提交 216fab0b authored 作者: james@mackie's avatar james@mackie

Upgraded op.compute() and result.compute() from gof.lib to gof.op, gof.result

Rationale: they are a simple wrapper around perform(), which is in the interface of gof.op. Fixed bugs in omega.grad.Grad to make it deal properly with mixed defined and undefined values. Added many more test cases to omega.grad. Changed the rule for upgrading grad() return values to lists, making them more strict. This cought bugs in pow_scalar functions, which were fixed. Added exp(), log() to omega.core
上级 9f5cd455
...@@ -256,6 +256,7 @@ class omega_op(gof.PythonOp): ...@@ -256,6 +256,7 @@ class omega_op(gof.PythonOp):
@staticmethod @staticmethod
def __clsinit__(cls, name, bases, dct): def __clsinit__(cls, name, bases, dct):
for fname in ['grad', 'c_impl']: for fname in ['grad', 'c_impl']:
if hasattr(cls, fname):
gof.make_static(cls, fname) gof.make_static(cls, fname)
# make impl a static method # make impl a static method
...@@ -269,15 +270,32 @@ class omega_op(gof.PythonOp): ...@@ -269,15 +270,32 @@ class omega_op(gof.PythonOp):
return [NumpyR() for i in xrange(self.nout)] return [NumpyR() for i in xrange(self.nout)]
def update_gradient(self, grad_d): def update_gradient(self, grad_d):
"""Call self.grad() and add the result to grad_d
This function is called by grad.Grad.bprop() to construct a symbolic gradient graph.
self.grad is called like this:
self.grad(*(self.inputs + [grad_d[output] for output in self.outputs]))
In general, grad() should return a list of PythonR instances whose
length matches that of self.inputs, and whose elements are the
gradients of self.inputs.
There is a (but often used) special feature in place to automatically
wrap the return value of grad() in a list if it is a PythonR instance
and the op is unary. This makes many grad implementations a little
cuter.
"""
inputgs = self.grad(*(self.inputs + [grad_d[output] for output in self.outputs])) inputgs = self.grad(*(self.inputs + [grad_d[output] for output in self.outputs]))
if not isinstance(inputgs, (list, tuple)): if len(self.inputs) == 1 and isinstance(inputgs, gof.PythonR):
inputgs = [inputgs] * len(self.inputs) inputgs = [inputgs]
else:
assert len(inputgs) == len(self.inputs)
for input, inputg in zip(self.inputs, inputgs): for input, inputg in zip(self.inputs, inputgs):
grad_d.add(input, inputg) grad_d.add(input, inputg)
def grad(*args):
return UNDEFINED
def c_code(self, converters = None): def c_code(self, converters = None):
(inames, onames) = self.variable_names() (inames, onames) = self.variable_names()
behavior = self._c_impl() behavior = self._c_impl()
...@@ -760,7 +778,7 @@ class tensor_scalar_op(elemwise): ...@@ -760,7 +778,7 @@ class tensor_scalar_op(elemwise):
class add_elemwise(elemwise): class add_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__add__) impl = assert_same_shapes(numpy.ndarray.__add__)
def grad(x, y, gz): def grad(x, y, gz):
return gz return gz, gz
def c_foreach((x_i, y_i), (z_i, )): def c_foreach((x_i, y_i), (z_i, )):
return "z_i = x_i + y_i;" return "z_i = x_i + y_i;"
...@@ -778,10 +796,10 @@ add_scalar_inplace = add_scalar.inplace_version() ...@@ -778,10 +796,10 @@ add_scalar_inplace = add_scalar.inplace_version()
add_scalar_inplace.set_impl(tensor_scalar_impl(numpy.ndarray.__iadd__)) add_scalar_inplace.set_impl(tensor_scalar_impl(numpy.ndarray.__iadd__))
class twice(elemwise): class twice(elemwise):
def impl(x):
return 2.0 * x
def grad(x, gz): def grad(x, gz):
return scale(gz, 2.0) return scale(gz, 2.0)
def impl(x):
return x + x
def c_foreach((x_i, ), (z_i, )): def c_foreach((x_i, ), (z_i, )):
"z_i = x_i + x_i;" "z_i = x_i + x_i;"
...@@ -1254,9 +1272,18 @@ class array_copy(elemwise): ...@@ -1254,9 +1272,18 @@ class array_copy(elemwise):
## Power ## ## Power ##
class exp(elemwise):
def impl(x): return numpy.exp(x)
def grad(x, gz): return gz * exp(x)
class log(elemwise):
def impl(x): return numpy.log(x)
def grad(x, gz): return gz / x
class pow_elemwise(elemwise): class pow_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__pow__) impl = assert_same_shapes(numpy.ndarray.__pow__)
def grad(x, s, gz): def grad(x, s, gz):
raise NotImplemented # no gs
return gz * s * (pow_elemwise(x, s-1.0)) return gz * s * (pow_elemwise(x, s-1.0))
def c_foreach((x_i, s_i), (z_i, )): def c_foreach((x_i, s_i), (z_i, )):
return "z_i = pow(x_i, s_i)" return "z_i = pow(x_i, s_i)"
...@@ -1264,17 +1291,19 @@ class pow_elemwise(elemwise): ...@@ -1264,17 +1291,19 @@ class pow_elemwise(elemwise):
pow_elemwise_inplace = pow_elemwise.inplace_version() pow_elemwise_inplace = pow_elemwise.inplace_version()
pow_elemwise_inplace.set_impl(assert_same_shapes(numpy.ndarray.__ipow__)) pow_elemwise_inplace.set_impl(assert_same_shapes(numpy.ndarray.__ipow__))
class pow_scalar_l(tensor_scalar_op): class pow_scalar_l(tensor_scalar_op):
impl = tensor_scalar_impl(lambda x, y: numpy.ndarray.__pow__(y, x)) impl = tensor_scalar_impl(lambda x, y: numpy.ndarray.__pow__(y, x))
def grad(x, s, gz): def grad(x, s, gz):
raise NotImplemented # no gs
return gz * x * (pow_scalar_l(s,x-1.0)) return gz * x * (pow_scalar_l(s,x-1.0))
c_expr = "pow(a, x_i)" c_expr = "pow(a, x_i)"
class pow_scalar_r(tensor_scalar_op): class pow_scalar_r(tensor_scalar_op):
impl = tensor_scalar_impl(numpy.ndarray.__pow__) impl = tensor_scalar_impl(numpy.ndarray.__pow__)
def grad(x, s, gz): def grad(x, s, gz):
return gz * s * (pow_scalar_r(x,s-1.0)) gx = gz * s * (pow_scalar_r(x,s-1.0))
gs = sum(gz * pow_scalar_r(x,s) * log(x))
return gx, gs
c_expr = "pow(x_i, a)" c_expr = "pow(x_i, a)"
pow_scalar_r_inplace = pow_scalar_r.inplace_version() pow_scalar_r_inplace = pow_scalar_r.inplace_version()
......
...@@ -114,12 +114,9 @@ class PythonR(Result): ...@@ -114,12 +114,9 @@ class PythonR(Result):
def alloc(self): def alloc(self):
raise TypeError("Cannot allocate following this specification.") raise TypeError("Cannot allocate following this specification.")
def perform(self):
if self.owner:
self.owner.perform()
def compute(self): def compute(self):
if self.owner: """Overrides Op.compute(). Only recurses if self.data is UNCOMPUTED"""
if self.data is UNCOMPUTED:
self.owner.compute() self.owner.compute()
...@@ -239,14 +236,6 @@ class PythonOp(Op): ...@@ -239,14 +236,6 @@ class PythonOp(Op):
for result, output in zip(results, self.outputs): for result, output in zip(results, self.outputs):
output.data[:] = result output.data[:] = result
def compute(self):
for input in self.inputs:
if input.data is UNCOMPUTED:
if input.owner:
input.owner.compute()
else:
raise Exception("Uncomputed input: %s in %s" % (input, self))
self.perform()
def _impl(self): def _impl(self):
return self.impl(*[input.data for input in self.inputs]) return self.impl(*[input.data for input in self.inputs])
......
...@@ -216,6 +216,11 @@ class Op(object): ...@@ -216,6 +216,11 @@ class Op(object):
""" """
return self.perform() return self.perform()
def compute(self):
"""Recursively perform() the ancestors of this node and then perform(self)."""
for input in self.inputs:
input.compute()
self.perform()
@classmethod @classmethod
def require(cls): def require(cls):
......
...@@ -102,6 +102,17 @@ class Result(object): ...@@ -102,6 +102,17 @@ class Result(object):
""" """
raise NotImplementedError("This Result does not support set_value.") raise NotImplementedError("This Result does not support set_value.")
def compute(self):
"""If self has an owner, recursively compute it."""
if self.owner:
self.owner.compute()
def perform(self):
"""Calls self.owner.perform() if self.owner exists."""
if self.owner:
self.owner.perform()
# def extract(self): # def extract(self):
# """ # """
# Returns a representation of this datum for use in Op.impl. # Returns a representation of this datum for use in Op.impl.
......
...@@ -21,13 +21,18 @@ class Grad(object): ...@@ -21,13 +21,18 @@ class Grad(object):
self.outputs = [] self.outputs = []
for key,val in dct.items(): for key,val in dct.items():
self.add_output(key,val) self.add_output(key,val)
self.did_bprop = False
def __contains__(self, item): def __contains__(self, item):
return item in self.map return item in self.map
def __getitem__(self, item): def __getitem__(self, item):
"""Map item to its id and retrieve it.""" """Map item to its id and retrieve it."""
return self.map[core.wrap(item)] key = core.wrap(item)
try:
return self.map[key]
except KeyError:
return core.UNDEFINED
def __setitem__(self, item, val): def __setitem__(self, item, val):
"""Map item to its id and store internally.""" """Map item to its id and store internally."""
...@@ -73,7 +78,7 @@ class Grad(object): ...@@ -73,7 +78,7 @@ class Grad(object):
else: else:
self[r] = dr self[r] = dr
def bprop(self): def bprop(self, maybe_redo=False):
"""Build a backpropagation graph. """Build a backpropagation graph.
The gradient associated with each value is stored in <self> which The gradient associated with each value is stored in <self> which
...@@ -92,6 +97,8 @@ class Grad(object): ...@@ -92,6 +97,8 @@ class Grad(object):
bprop sets the omega evaluation mode to be 'build', so no computations bprop sets the omega evaluation mode to be 'build', so no computations
or allocations are done by bprop. or allocations are done by bprop.
""" """
if not maybe_redo and self.did_bprop:
raise Exception('bprop has already been done. Consider calling with maybe_redo=True.')
core.build_mode() core.build_mode()
try: try:
outputs = self.outputs outputs = self.outputs
...@@ -100,6 +107,7 @@ class Grad(object): ...@@ -100,6 +107,7 @@ class Grad(object):
op.update_gradient(self) op.update_gradient(self)
finally: finally:
core.pop_mode() core.pop_mode()
self.did_bprop = True
def __call__(self, item): def __call__(self, item):
"""Return a derivative term. """Return a derivative term.
...@@ -107,8 +115,11 @@ class Grad(object): ...@@ -107,8 +115,11 @@ class Grad(object):
If the current omega evaluation mode is 'build_eval' then the node is If the current omega evaluation mode is 'build_eval' then the node is
computed if necessary. computed if necessary.
""" """
if not self.did_bprop:
raise Exception('Grad.__call__ only makes sense after a bprop')
rval = self[item] rval = self[item]
if core.current_mode() == 'build_eval': if rval is not core.UNDEFINED \
and core.current_mode() == 'build_eval':
rval.compute() rval.compute()
return rval return rval
...@@ -141,8 +152,18 @@ import unittest ...@@ -141,8 +152,18 @@ import unittest
import numpy import numpy
import compile import compile
class _testCase (unittest.TestCase): class _testCase (unittest.TestCase):
class posneg(core.omega_op):
nout=2
def impl(x): return x, -x
def grad(x, gpos, gneg): return gpos - gneg
class posnegzero(core.omega_op):
nout=3
def impl(x): return x, -x, 0.0
def grad(x, gpos, gneg, gzero): return gpos - gneg
def setUp(self): def setUp(self):
numpy.random.seed(1) numpy.random.seed(1)
core.build_eval_mode() core.build_eval_mode()
...@@ -189,17 +210,16 @@ class _testCase (unittest.TestCase): ...@@ -189,17 +210,16 @@ class _testCase (unittest.TestCase):
return str0, str(ssdiff) return str0, str(ssdiff)
def test0(self): def test0(self):
"""Matrix inversion by gradient descent (eval mode)"""
self.assertEqual(('2.67327580893', '0.000438649434819'), self.matinv(3)) self.assertEqual(('2.67327580893', '0.000438649434819'), self.matinv(3))
def test1(self): def test1(self):
"""Matrix inversion by gradient descent (compiled mode)"""
self.assertEqual(('2.67327580893', '0.000438649434819'), self.assertEqual(('2.67327580893', '0.000438649434819'),
self.matinv_compiled(3)) self.matinv_compiled(3))
def test_grad_wrt_ndarray_pointer(self): def test_grad_wrt_ndarray_pointer(self):
""" """Grad indexing by un-wrapped ndarray"""
Tests if it is possible to index the gradient by a pointer to a ndarray
that is used as a node of the computation graph.
"""
a = numpy.ones((4, 4)) a = numpy.ones((4, 4))
b = numpy.ones((4, 4)) b = numpy.ones((4, 4))
c = numpy.ones((4, 4)) c = numpy.ones((4, 4))
...@@ -207,10 +227,108 @@ class _testCase (unittest.TestCase): ...@@ -207,10 +227,108 @@ class _testCase (unittest.TestCase):
g = grad(expr) g = grad(expr)
g[a] g[a]
def test_bprop_call_order(self):
"""Ensure call before bprop is illegal"""
a = numpy.ones((3,3,3))
b = core.exp(a)
gb = Grad({b:core.wrap(a)})
try:
gb(a)
self.assertEqual('should have raised',0)
except Exception, e:
self.assertEqual(e.message, 'Grad.__call__ only makes sense after a bprop')
return
self.assertEqual('should have caught, returned',0)
def test_undefined_grad0(self):
"""Make sure posneg works with fully specified gradients"""
a = numpy.ones((3,3,3))
b,c = _testCase.posneg(a)
g = Grad({b:core.wrap(a),c:core.wrap(a)})
g.bprop()
max = numpy.max(g(a))
min = numpy.min(g(a))
self.assertEqual(max, min)
self.assertEqual(max, 0.0)
def test_undefined_grad1(self):
"""Propagate undefined values through posneg's first gradient"""
a = numpy.ones((3,3,3))
b,c = _testCase.posneg(a)
gb = Grad({b:core.wrap(a)})
try:
gb.bprop()
self.assertEqual('should have raised',0)
except AttributeError, e:
self.assertEqual(e.message, "Keyword instance has no attribute 'shape'")
return
self.assertEqual("Should have been error", 0)
def test_undefined_grad2(self):
"""Propagate undefined values through posneg's second gradient"""
a = numpy.ones((3,3,3))
b,c = _testCase.posneg(a)
gc = Grad({c:core.wrap(a)})
try:
gc.bprop()
self.assertEqual('should have raised',0)
except AttributeError, e:
self.assertEqual(e.message, "Keyword instance has no attribute 'shape'")
return
self.assertEqual("Should have been error", 0)
def test_undefined_grad3(self):
"""Ignore undefined values properly"""
a = numpy.ones((3,3,3))
b,c,d = _testCase.posnegzero(a)
#print b, c, d
g = Grad({b:core.wrap(a), c:core.wrap(a)})
g.bprop()
max = numpy.max(g(a))
min = numpy.min(g(a))
self.assertEqual(max, min)
self.assertEqual(max, 0.0)
def test_repeat_bprop(self):
"""Refuse to repeat bprop"""
a = numpy.ones((3,3,3))
b,c,d = _testCase.posnegzero(a)
#print b, c, d
g = Grad({b:core.wrap(a), c:core.wrap(a)})
g.bprop()
try:
g.bprop()
self.assertEqual('should have raised')
except Exception, e:
self.assertEqual(e.message, 'bprop has already been done. Consider calling with maybe_redo=True.')
return
self.assertEqual('should have caught')
def test_repeat_bprop1(self):
"""Force repeat bprop"""
a = numpy.ones((3,3,3))
z = numpy.zeros((3,3,3))
b,c,d = _testCase.posnegzero(a)
#print b, c, d
g = Grad({b:core.wrap(a), c:core.wrap(z)})
g.bprop()
g.bprop(maybe_redo=True)
max = numpy.max(g(a))
min = numpy.min(g(a))
self.assertEqual(max, min)
self.assertEqual(max, 2.0)
def tearDown(self): def tearDown(self):
core.pop_mode() core.pop_mode()
if __name__ == '__main__': if __name__ == '__main__':
suite = unittest.TestLoader().loadTestsFromTestCase(_testCase) unittest.main()
unittest.TextTestRunner(verbosity=3).run(suite)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论