Upgraded op.compute() and result.compute() from gof.lib to gof.op, gof.result

Rationale: they are a simple wrapper around perform(), which is in the interface of gof.op. Fixed bugs in omega.grad.Grad to make it deal properly with mixed defined and undefined values. Added many more test cases to omega.grad. Changed the rule for upgrading grad() return values to lists, making them more strict. This cought bugs in pow_scalar functions, which were fixed. Added exp(), log() to omega.core

Upgraded op.compute() and result.compute() from gof.lib to gof.op, gof.result
216fab0b · james@mackie · 9f5cd455 · 216fab0b · 216fab0b · 216fab0b
--- a/core.py
+++ b/core.py
@@ -256,6 +256,7 @@ class omega_op(gof.PythonOp):
    @staticmethod
    def __clsinit__(cls, name, bases, dct):
        for fname in ['grad', 'c_impl']:
+            if hasattr(cls, fname):
                gof.make_static(cls, fname)
        # make impl a static method
@@ -269,15 +270,32 @@ class omega_op(gof.PythonOp):
        return [NumpyR() for i in xrange(self.nout)]
    def update_gradient(self, grad_d):
+        """Call self.grad() and add the result to grad_d
+        This function is called by grad.Grad.bprop() to construct a symbolic gradient graph.
+        self.grad is called like this:
+            self.grad(*(self.inputs + [grad_d[output] for output in self.outputs]))
+        In general, grad() should return a list of PythonR instances whose
+        length matches that of self.inputs, and whose elements are the
+        gradients of self.inputs.
+        There is a (but often used) special feature in place to automatically
+        wrap the return value of grad() in a list if it is a PythonR instance
+        and the op is unary.  This makes many grad implementations a little
+        cuter.
+        """
        inputgs = self.grad(*(self.inputs + [grad_d[output] for output in self.outputs]))
-        if not isinstance(inputgs, (list, tuple)):
+        if len(self.inputs) == 1 and isinstance(inputgs, gof.PythonR):
-            inputgs = [inputgs] * len(self.inputs)
+            inputgs = [inputgs]
+        else:
+            assert len(inputgs) == len(self.inputs)
        for input, inputg in zip(self.inputs, inputgs):
            grad_d.add(input, inputg)
-    def grad(*args):
-        return UNDEFINED
    def c_code(self, converters = None):
        (inames, onames) = self.variable_names()
        behavior = self._c_impl()
@@ -760,7 +778,7 @@ class tensor_scalar_op(elemwise):
 class add_elemwise(elemwise):
    impl = assert_same_shapes(numpy.ndarray.__add__)
    def grad(x, y, gz):
-        return gz
+        return gz, gz
    def c_foreach((x_i, y_i), (z_i, )):
        return "z_i = x_i + y_i;"
@@ -778,10 +796,10 @@ add_scalar_inplace = add_scalar.inplace_version()
 add_scalar_inplace.set_impl(tensor_scalar_impl(numpy.ndarray.__iadd__))
 class twice(elemwise):
+    def impl(x):
+        return 2.0 * x
    def grad(x, gz):
        return scale(gz, 2.0)
-    def impl(x):
-        return x + x
    def c_foreach((x_i, ), (z_i, )):
        "z_i = x_i + x_i;"
@@ -1254,9 +1272,18 @@ class array_copy(elemwise):
 ## Power ##
+class exp(elemwise):
+    def impl(x): return numpy.exp(x)
+    def grad(x, gz): return gz * exp(x)
+class log(elemwise):
+    def impl(x): return numpy.log(x)
+    def grad(x, gz): return gz / x
 class pow_elemwise(elemwise):
    impl = assert_same_shapes(numpy.ndarray.__pow__)
    def grad(x, s, gz):
+        raise NotImplemented # no gs
        return gz * s * (pow_elemwise(x, s-1.0))
    def c_foreach((x_i, s_i), (z_i, )):
        return "z_i = pow(x_i, s_i)"
@@ -1264,17 +1291,19 @@ class pow_elemwise(elemwise):
 pow_elemwise_inplace = pow_elemwise.inplace_version()
 pow_elemwise_inplace.set_impl(assert_same_shapes(numpy.ndarray.__ipow__))
 class pow_scalar_l(tensor_scalar_op):
    impl = tensor_scalar_impl(lambda x, y: numpy.ndarray.__pow__(y, x))
    def grad(x, s, gz):
+        raise NotImplemented # no gs
        return gz * x * (pow_scalar_l(s,x-1.0))
    c_expr = "pow(a, x_i)"
 class pow_scalar_r(tensor_scalar_op):
    impl = tensor_scalar_impl(numpy.ndarray.__pow__)
    def grad(x, s, gz):
-        return gz * s * (pow_scalar_r(x,s-1.0))
+        gx = gz * s * (pow_scalar_r(x,s-1.0))
+        gs = sum(gz * pow_scalar_r(x,s) * log(x))
+        return gx, gs
    c_expr = "pow(x_i, a)"
 pow_scalar_r_inplace = pow_scalar_r.inplace_version()

--- a/gof/lib.py
+++ b/gof/lib.py
@@ -114,12 +114,9 @@ class PythonR(Result):
    def alloc(self):
        raise TypeError("Cannot allocate following this specification.")
-    def perform(self):
-        if self.owner:
-            self.owner.perform()
    def compute(self):
-        if self.owner:
+        """Overrides Op.compute(). Only recurses if self.data is UNCOMPUTED"""
+        if self.data is UNCOMPUTED:
            self.owner.compute()
@@ -239,14 +236,6 @@ class PythonOp(Op):
            for result, output in zip(results, self.outputs):
                output.data[:] = result
-    def compute(self):
-        for input in self.inputs:
-            if input.data is UNCOMPUTED:
-                if input.owner:
-                    input.owner.compute()
-                else:
-                    raise Exception("Uncomputed input: %s in %s" % (input, self))
-        self.perform()
    def _impl(self):
        return self.impl(*[input.data for input in self.inputs])

--- a/gof/op.py
+++ b/gof/op.py
@@ -216,6 +216,11 @@ class Op(object):
        """
        return self.perform()
+    def compute(self):
+        """Recursively perform() the ancestors of this node and then perform(self)."""
+        for input in self.inputs:
+            input.compute()
+        self.perform()
    @classmethod
    def require(cls):

--- a/gof/result.py
+++ b/gof/result.py
@@ -102,6 +102,17 @@ class Result(object):
        """
        raise NotImplementedError("This Result does not support set_value.")
+    def compute(self):
+        """If self has an owner, recursively compute it."""
+        if self.owner:
+            self.owner.compute()
+    def perform(self):
+        """Calls self.owner.perform() if self.owner exists."""
+        if self.owner:
+            self.owner.perform()
 #     def extract(self):
 #         """
 #         Returns a representation of this datum for use in Op.impl.

--- a/grad.py
+++ b/grad.py
@@ -21,13 +21,18 @@ class Grad(object):
        self.outputs = []
        for key,val in dct.items():
            self.add_output(key,val)
+        self.did_bprop = False
    def __contains__(self, item):
        return item in self.map
    def __getitem__(self, item):
        """Map item to its id and retrieve it."""
-        return self.map[core.wrap(item)]
+        key = core.wrap(item)
+        try:
+            return self.map[key]
+        except KeyError:
+            return core.UNDEFINED
    def __setitem__(self, item, val):
        """Map item to its id and store internally."""
@@ -73,7 +78,7 @@ class Grad(object):
            else:
                self[r] = dr
-    def bprop(self):
+    def bprop(self, maybe_redo=False):
        """Build a backpropagation graph.
        The gradient associated with each value is stored in <self> which
@@ -92,6 +97,8 @@ class Grad(object):
        bprop sets the omega evaluation mode to be 'build', so no computations
        or allocations are done by bprop.
        """
+        if not maybe_redo and self.did_bprop:
+            raise Exception('bprop has already been done. Consider calling with maybe_redo=True.')
        core.build_mode()
        try:
            outputs = self.outputs
@@ -100,6 +107,7 @@ class Grad(object):
                op.update_gradient(self)
        finally:
            core.pop_mode()
+            self.did_bprop = True
    def __call__(self, item):
        """Return a derivative term.
@@ -107,8 +115,11 @@ class Grad(object):
        If the current omega evaluation mode is 'build_eval' then the node is
        computed if necessary.
        """
+        if not self.did_bprop:
+            raise Exception('Grad.__call__ only makes sense after a bprop')
        rval = self[item]
-        if core.current_mode() == 'build_eval':
+        if rval is not core.UNDEFINED \
+                and core.current_mode() == 'build_eval':
            rval.compute()
        return rval
@@ -141,8 +152,18 @@ import unittest
 import numpy
 import compile
 class _testCase (unittest.TestCase):
+    class posneg(core.omega_op):
+        nout=2
+        def impl(x): return x, -x
+        def grad(x, gpos, gneg): return gpos - gneg
+    class posnegzero(core.omega_op):
+        nout=3
+        def impl(x): return x, -x, 0.0
+        def grad(x, gpos, gneg, gzero): return gpos - gneg
    def setUp(self):
        numpy.random.seed(1)
        core.build_eval_mode()
@@ -189,17 +210,16 @@ class _testCase (unittest.TestCase):
        return str0, str(ssdiff)
    def test0(self):
+        """Matrix inversion by gradient descent (eval mode)"""
        self.assertEqual(('2.67327580893', '0.000438649434819'), self.matinv(3))
    def test1(self):
+        """Matrix inversion by gradient descent (compiled mode)"""
        self.assertEqual(('2.67327580893', '0.000438649434819'),
                self.matinv_compiled(3))
    def test_grad_wrt_ndarray_pointer(self):
-        """
+        """Grad indexing by un-wrapped ndarray"""
-        Tests if it is possible to index the gradient by a pointer to a ndarray
-        that is used as a node of the computation graph.
-        """
        a = numpy.ones((4, 4))
        b = numpy.ones((4, 4))
        c = numpy.ones((4, 4))
@@ -207,10 +227,108 @@ class _testCase (unittest.TestCase):
        g = grad(expr)
        g[a]
+    def test_bprop_call_order(self):
+        """Ensure call before bprop is illegal"""
+        a = numpy.ones((3,3,3))
+        b = core.exp(a)
+        gb = Grad({b:core.wrap(a)})
+        try:
+            gb(a)
+            self.assertEqual('should have raised',0)
+        except Exception, e:
+            self.assertEqual(e.message, 'Grad.__call__ only makes sense after a bprop')
+            return
+        self.assertEqual('should have caught, returned',0)
+    def test_undefined_grad0(self):
+        """Make sure posneg works with fully specified gradients"""
+        a = numpy.ones((3,3,3))
+        b,c = _testCase.posneg(a)
+        g = Grad({b:core.wrap(a),c:core.wrap(a)})
+        g.bprop()
+        max = numpy.max(g(a))
+        min = numpy.min(g(a))
+        self.assertEqual(max, min)
+        self.assertEqual(max, 0.0)
+    def test_undefined_grad1(self):
+        """Propagate undefined values through posneg's first gradient"""
+        a = numpy.ones((3,3,3))
+        b,c = _testCase.posneg(a)
+        gb = Grad({b:core.wrap(a)})
+        try:
+            gb.bprop()
+            self.assertEqual('should have raised',0)
+        except AttributeError, e:
+            self.assertEqual(e.message, "Keyword instance has no attribute 'shape'")
+            return
+        self.assertEqual("Should have been error", 0)
+    def test_undefined_grad2(self):
+        """Propagate undefined values through posneg's second gradient"""
+        a = numpy.ones((3,3,3))
+        b,c = _testCase.posneg(a)
+        gc = Grad({c:core.wrap(a)})
+        try:
+            gc.bprop()
+            self.assertEqual('should have raised',0)
+        except AttributeError, e:
+            self.assertEqual(e.message, "Keyword instance has no attribute 'shape'")
+            return
+        self.assertEqual("Should have been error", 0)
+    def test_undefined_grad3(self):
+        """Ignore undefined values properly"""
+        a = numpy.ones((3,3,3))
+        b,c,d = _testCase.posnegzero(a)
+        #print b, c, d
+        g = Grad({b:core.wrap(a), c:core.wrap(a)})
+        g.bprop()
+        max = numpy.max(g(a))
+        min = numpy.min(g(a))
+        self.assertEqual(max, min)
+        self.assertEqual(max, 0.0)
+    def test_repeat_bprop(self):
+        """Refuse to repeat bprop"""
+        a = numpy.ones((3,3,3))
+        b,c,d = _testCase.posnegzero(a)
+        #print b, c, d
+        g = Grad({b:core.wrap(a), c:core.wrap(a)})
+        g.bprop()
+        try:
+            g.bprop()
+            self.assertEqual('should have raised')
+        except Exception, e:
+            self.assertEqual(e.message, 'bprop has already been done. Consider calling with maybe_redo=True.')
+            return
+        self.assertEqual('should have caught')
+    def test_repeat_bprop1(self):
+        """Force repeat bprop"""
+        a = numpy.ones((3,3,3))
+        z = numpy.zeros((3,3,3))
+        b,c,d = _testCase.posnegzero(a)
+        #print b, c, d
+        g = Grad({b:core.wrap(a), c:core.wrap(z)})
+        g.bprop()
+        g.bprop(maybe_redo=True)
+        max = numpy.max(g(a))
+        min = numpy.min(g(a))
+        self.assertEqual(max, min)
+        self.assertEqual(max, 2.0)
    def tearDown(self):
        core.pop_mode()
 if __name__ == '__main__':
-    suite = unittest.TestLoader().loadTestsFromTestCase(_testCase)
+    unittest.main()
-    unittest.TextTestRunner(verbosity=3).run(suite)