merge

1c00d792 · Olivier Breuleux · d949809a · 77c9fd0b · 1c00d792 · 1c00d792
--- a/_test_gradient.py
+++ b/_test_gradient.py
@@ -4,11 +4,279 @@
 #
 import unittest
 import numpy
-import compile
-import tensor
 import tensor_ops as T
+import tensor
+import gof
 from gradient import *
+import gradient
+class posneg(T.TensorOp):
+    nout=2
+    def impl(self, x): return x, -x
+    def grad(self, x, (gpos, gneg)): return gpos - gneg
+class posnegzero(T.TensorOp):
+    nout=3
+    def impl(self, x): return x, -x, 0.0
+    def grad(self, x, (gpos, gneg, gzero)): return gpos - gneg
+class _test_grad_sources_inputs(unittest.TestCase):
+    def test_retNone1(self): 
+        """Test that it is not ok to return None from op.grad()"""
+        class retNone(gof.op.Op):
+            def __init__(self, arg):
+                self.inputs = [gof.result.ResultBase()]
+                self.outputs = [gof.result.ResultBase()]
+            def grad(self, x, gz):
+                pass
+        a = retNone(5)
+        try:
+            grad_sources_inputs([(a.out, 1)], None)
+        except ValueError, e:
+            self.failUnless(e[0] is gradient._msg_retNone)
+            return
+        self.fail()
+    def test_retNone1_b(self): 
+        """Test that it is ok to return [None] from op.grad()"""
+        class retNone(gof.op.Op):
+            def __init__(self, arg):
+                self.inputs = arg
+                self.outputs = [gof.result.ResultBase()]
+            def grad(self, x, gz):
+                return [None]
+        i = gof.result.ResultBase()
+        a = retNone([i])
+        g = grad_sources_inputs([(a.out, 1)], None)
+        self.failUnless(not i in g)
+    def test_wrong_rval_len1(self): 
+        """Test that it is not ok to return the wrong number of gradients"""
+        class retNone(gof.op.Op):
+            def __init__(self, arg):
+                self.inputs = arg
+                self.outputs = [gof.result.ResultBase()]
+            def grad(self, inputs, gz):
+                return [None]
+        i = gof.result.ResultBase()
+        j = gof.result.ResultBase()
+        a1 = retNone([i])
+        g = grad_sources_inputs([(a1.out, 1)], None)
+        a2 = retNone([i,j])
+        try:
+            g = grad_sources_inputs([(a2.out, 1)], None)
+        except ValueError, e:
+            self.failUnless(e[0] is gradient._msg_badlen)
+            return
+        self.fail()
+    def test_stop_on_all_none(self):
+        """Test that op.grad() is not called when output grads are all None"""
+        class retNone(gof.op.Op):
+            def __init__(self, arg, tst):
+                self.inputs = arg
+                self.outputs = [gof.result.ResultBase()]
+                self.tst = tst
+            def grad(self, inputs, gz):
+                self.tst.fail()
+        i = gof.result.ResultBase()
+        a1 = retNone([i],self)
+        g = grad_sources_inputs([(a1.out, None)], None)
+    def test_1in_1out(self):
+        """Test grad is called correctly for a 1-to-1 op"""
+        gval = gof.result.ResultBase()
+        class O(gof.op.Op):
+            def __init__(self):
+                self.inputs = [gof.result.ResultBase()]
+                self.outputs = [gof.result.ResultBase()]
+            def grad(self, x, gz):
+                return gval
+        a1 = O()
+        g = grad_sources_inputs([(a1.outputs[0], 1)], None)
+        self.failUnless(g[a1.inputs[0]] is gval)
+    def test_1in_Nout(self):
+        """Test grad is called correctly for a 1-to-many op"""
+        gval = gof.result.ResultBase()
+        class O(gof.op.Op):
+            def __init__(self):
+                self.inputs = [gof.result.ResultBase()]
+                self.outputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+            def grad(self, x, (gz1, gz2)):
+                return gval
+        a1 = O()
+        g = grad_sources_inputs([(a1.outputs[0], 1)], None)
+        self.failUnless(g[a1.inputs[0]] is gval)
+    def test_Nin_1out(self):
+        """Test grad is called correctly for a many-to-1 op"""
+        gval0 = gof.result.ResultBase()
+        gval1 = gof.result.ResultBase()
+        class O(gof.op.Op):
+            def __init__(self):
+                self.inputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+                self.outputs = [gof.result.ResultBase()]
+            def grad(self, (x0,x1), gz):
+                return (gval0, gval1)
+        a1 = O()
+        g = grad_sources_inputs([(a1.outputs[0], 1)], None)
+        self.failUnless(g[a1.inputs[0]] is gval0)
+        self.failUnless(g[a1.inputs[1]] is gval1)
+    def test_Nin_Nout(self):
+        """Test grad is called correctly for a many-to-many op"""
+        gval0 = gof.result.ResultBase()
+        gval1 = gof.result.ResultBase()
+        class O(gof.op.Op):
+            def __init__(self):
+                self.inputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+                self.outputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+            def grad(self, (x0,x1), (gz0,gz1)):
+                return gval0, gval1
+        a1 = O()
+        g = grad_sources_inputs([(a1.outputs[0], 1)], None)
+        self.failUnless(g[a1.inputs[0]] is gval0)
+        self.failUnless(g[a1.inputs[1]] is gval1)
+    def test_some_None_ograds(self):
+        """Test grad is called when some output gradients are None"""
+        class O(gof.op.Op):
+            def __init__(self, arg, tst):
+                self.inputs = arg
+                self.outputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+                self.tst = tst
+            def grad(self, inputs, g_out):
+                return [1]
+        i = gof.result.ResultBase()
+        a1 = O([i],self)
+        g = grad_sources_inputs([(a1.outputs[0], 1)], None)
+        self.failUnless(g[i] is 1)
+    def test_some_None_igrads(self):
+        """Test that traversal works properly when an op return some None"""
+        class O(gof.op.Op):
+            def __init__(self, arg, tst, grad_ok):
+                self.inputs = arg
+                self.outputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+                self.tst = tst
+                self.grad_ok = grad_ok
+            def grad(self, inputs, g_out):
+                if not self.grad_ok:
+                    self.tst.fail()
+                else:
+                    return [1, None]
+        i = gof.result.ResultBase()
+        j = gof.result.ResultBase()
+        k = gof.result.ResultBase()
+        a1 = O([i,j],self,True)
+        a2 = O([a1.outputs[1], k], self, True)
+        g = grad_sources_inputs([(a2.outputs[0], 1)], None)
+        self.failUnless(g[i] is 1 and j not in g and k not in g)
+        a1 = O([i,j],self,True)
+        a2 = O([k, a1.outputs[1]], self, True)
+        g = grad_sources_inputs([(a2.outputs[0], 1)], None)
+        self.failUnless(g[k] is 1 and i not in g and j not in g)
+    def test_inputs(self):
+        """Test that passing inputs shortens the traversal"""
+        class O(gof.op.Op):
+            def __init__(self, arg, tst, grad_ok):
+                self.inputs = arg
+                self.outputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+                self.tst = tst
+                self.grad_ok = grad_ok
+            def grad(self, inputs, (g0,g1)):
+                if not self.grad_ok:
+                    self.tst.fail()
+                else:
+                    if g1:
+                        return [g0, g0+g1]
+                    else:
+                        return [g0, g0]
+        i = gof.result.ResultBase()
+        j = gof.result.ResultBase()
+        k = gof.result.ResultBase()
+        a1 = O([i,j],self,True)
+        a2 = O([k,a1.outputs[1]], self, True)
+        g = grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
+            (a1.outputs[0], 3), (a1.outputs[0], 3)], a1.outputs)
+        self.failUnless(g[a2.inputs[0]] == 1)
+        self.failUnless(g[a2.inputs[1]] == 5)
+        self.failUnless(g[a1.outputs[0]] == 6)
+        self.failUnless(g[a1.outputs[1]] == 5)
+        self.failUnless(a1.inputs[0] not in g)
+        self.failUnless(a1.inputs[1] not in g)
+    def test_multiple_sources(self):
+        """Test that passing multiple sources works"""
+        class O(gof.op.Op):
+            def __init__(self, arg, tst, grad_ok):
+                self.inputs = arg
+                self.outputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+                self.tst = tst
+                self.grad_ok = grad_ok
+            def grad(self, inputs, (g0,g1)):
+                if not self.grad_ok:
+                    self.tst.fail()
+                else:
+                    if g1:
+                        return [g0, g0+g1]
+                    else:
+                        return [g0, g0]
+        i = gof.result.ResultBase()
+        j = gof.result.ResultBase()
+        k = gof.result.ResultBase()
+        a1 = O([i,j],self,True)
+        a2 = O([k,a1.outputs[1]], self, True)
+        g = grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
+            (a1.outputs[0], 3), (a1.outputs[0], 3)], None)
+        self.failUnless(g[a2.inputs[0]] == 1)
+        self.failUnless(g[a2.inputs[1]] == 5)
+        self.failUnless(g[a1.outputs[0]] == 6)
+        self.failUnless(g[a1.outputs[1]] == 5)
+        self.failUnless(g[a1.inputs[0]] == 6)
+        self.failUnless(g[a1.inputs[1]] == 11)
+class _test_grad(unittest.TestCase):
+    class O(gof.op.Op):
+        def __init__(self):
+            self.inputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+            self.outputs = [gof.result.ResultBase(),gof.result.ResultBase()]
+            self.gval0 = gof.result.ResultBase()
+            self.gval1 = gof.result.ResultBase()
+        def grad(self, (x0,x1), (gz0,gz1)):
+            return self.gval0, self.gval1
+    def test_1param(self):
+        """grad: Test passing a single result param"""
+        a1 = _test_grad.O()
+        self.failUnless(a1.gval0 is grad(a1.outputs[0], a1.inputs[0]))
+    def test_Nparam(self):
+        """grad: Test passing multiple result params"""
+        a1 = _test_grad.O()
+        g0,g1 = grad(a1.outputs[0], a1.inputs)
+        self.failUnless(a1.gval0 is g0)
+        self.failUnless(a1.gval1 is g1)
+    def test_1None_rval(self):
+        """grad: Test returning a single None from grad"""
+        a1 = _test_grad.O()
+        self.failUnless(None is grad(a1.outputs[0], a1.outputs[1]))
+        self.failUnless(None is grad(a1.outputs[0], 'wtf'))
+    def test_NNone_rval(self):
+        """grad: Test returning some Nones from grad"""
+        a1 = _test_grad.O()
+        g0,g1,g2 = grad(a1.outputs[0], a1.inputs + ['wtf'])
+        self.failUnless(a1.gval0 is g0)
+        self.failUnless(a1.gval1 is g1)
+        self.failUnless(None is g2)
 def matrix():
    return tensor.Tensor('float64', [0,0])
@@ -17,9 +285,13 @@ def matrices(n):
    return [matrix() for i in xrange(n)]
-class _testCase (unittest.TestCase):
+#TODO: move this to the _test_tensor_ops.py
+class _testCase_matinv:# (unittest.TestCase):
    def setUp(self):
        numpy.random.seed(1)
    def matinv(self,dim):
        # symbolic program
        a,b = matrices(2)
@@ -47,130 +319,5 @@ class _testCase (unittest.TestCase):
        """Matrix inversion by gradient descent (eval mode)"""
        self.assertEqual(('2.67327580893', '0.000438649434819'), self.matinv(3))
-class _testCase_old:
-    class posneg(T._TensorOp):
-        nout=2
-        def impl(x): return x, -x
-        def grad(x, gpos, gneg): return gpos - gneg
-    class posnegzero(T._TensorOp):
-        nout=3
-        def impl(x): return x, -x, 0.0
-        def grad(x, gpos, gneg, gzero): return gpos - gneg
-    def setUp(self):
-        numpy.random.seed(1)
-    def test_grad_wrt_ndarray_pointer(self):
-        """Grad indexing by un-wrapped ndarray"""
-        a = numpy.ones((4, 4))
-        b = numpy.ones((4, 4))
-        c = numpy.ones((4, 4))
-        expr = core.sum(core.dot(core.add(a, b), c))
-        g = grad(expr)
-        g[a]
-    def test_bprop_call_order(self):
-        """Ensure call before bprop is illegal"""
-        a = numpy.ones((3,3,3))
-        b = core.exp(a)
-        gb = Grad({b:wrappers.wrap(a)})
-        try:
-            gb(a)
-            self.assertEqual('should have raised',0)
-        except Exception, e:
-            self.assertEqual(str(e), 'Grad.__call__ only makes sense after a bprop')
-            return
-        self.assertEqual('should have caught, returned',0)
-    def test_undefined_grad0(self):
-        """Make sure posneg works with fully specified gradients"""
-        a = numpy.ones((3,3,3))
-        b,c = _testCase.posneg(a)
-        g = Grad({b:wrappers.wrap(a),c:wrappers.wrap(a)})
-        g.bprop()
-        max = numpy.max(g(a))
-        min = numpy.min(g(a))
-        self.assertEqual(max, min)
-        self.assertEqual(max, 0.0)
-    def test_undefined_grad1(self):
-        """Propagate undefined values through posneg's first gradient"""
-        a = numpy.ones((3,3,3))
-        b,c = _testCase.posneg(a)
-        gb = Grad({b:wrappers.wrap(a)})
-        try:
-            gb.bprop()
-            self.assertEqual('should have raised',0)
-        except UndefinedError:
-            return
-        self.assertEqual("Should have been error", 0)
-    def test_undefined_grad2(self):
-        """Propagate undefined values through posneg's second gradient"""
-        a = numpy.ones((3,3,3))
-        b,c = _testCase.posneg(a)
-        gc = Grad({c:wrappers.wrap(a)})
-        try:
-            gc.bprop()
-            self.assertEqual('should have raised',0)
-        except UndefinedError:
-            return
-        self.assertEqual("Should have been error", 0)
-    def test_undefined_grad3(self):
-        """Ignore undefined values properly"""
-        a = numpy.ones((3,3,3))
-        b,c,d = _testCase.posnegzero(a)
-        #print b, c, d
-        g = Grad({b:wrappers.wrap(a), c:wrappers.wrap(a)})
-        g.bprop()
-        max = numpy.max(g(a))
-        min = numpy.min(g(a))
-        self.assertEqual(max, min)
-        self.assertEqual(max, 0.0)
-    def test_repeat_bprop(self):
-        """Refuse to repeat bprop"""
-        a = numpy.ones((3,3,3))
-        b,c,d = _testCase.posnegzero(a)
-        #print b, c, d
-        g = Grad({b:wrappers.wrap(a), c:wrappers.wrap(a)})
-        g.bprop()
-        try:
-            g.bprop()
-            self.assertEqual('should have raised')
-        except Exception, e:
-            self.assertEqual(str(e), 'bprop has already been done. Consider calling with maybe_redo=True.')
-            return
-        self.assertEqual('should have caught')
-    def test_repeat_bprop1(self):
-        """Force repeat bprop"""
-        a = numpy.ones((3,3,3))
-        z = numpy.zeros((3,3,3))
-        b,c,d = _testCase.posnegzero(a)
-        #print b, c, d
-        g = Grad({b:wrappers.wrap(a), c:wrappers.wrap(z)})
-        g.bprop()
-        g.bprop(maybe_redo=True)
-        max = numpy.max(g(a))
-        min = numpy.min(g(a))
-        self.assertEqual(max, min)
-        self.assertEqual(max, 2.0)
-    def tearDown(self):
-        core.pop_mode()
 if __name__ == '__main__':
    unittest.main()
--- a/gradient.py
+++ b/gradient.py
-import gof
+import gof, gof.result
-class OrderError(Exception):
+_msg_retNone = 'op.grad(...) returned None, consider returning [None]'
-    """Grad has been manipulated in the wrong order"""
+_msg_badlen = 'op.grad(...) returned wrong number of gradients'
-class Grad(object):
+def _unpack_result(lst):
-    """A dictionary-like class, into which derivative expressions may be added.
+    if len(lst) > 1:
+        return lst
+    else:
+        return lst[0]
-    Attributes:
+def _pack_result(arg):
-    map - dict: result -> grad(result)
+    if isinstance(arg, gof.result.ResultBase):
-    outputs - list: results from which to backpropagate gradient
+        return [arg]
-    did_bprop - bool: has bprop been called?
+    else:
-    items_got - set: results for which we have returned the gradient
+        return arg
+def grad_sources_inputs(sources, graph_inputs):
+    """Return a dictionary mapping each result necessary for a source to its gradient
-    Methods:
+    sources - a list of gradient sources (explained below)
+    graph_inputs - a list of results considered to be constant
-    add() - accumulate a gradient expression
+    A gradient source is a pair (r, g_r), in which r is a result, and g_r is a
-    bprop() - recursively construct gradient expressions
+    result that is a gradient wrt r.
-    __call__() - retrieve the gradient wrt a given Op or result
-    __getitem__() - retrieve the gradient wrt a given Op or result
-    This class operates on graphs of nodes which implement the UpdateGradient interface.
+    This function traverses the graph backward from the 'r' sources,
+    calling op.grad(...) when it is provided by an op, and at least one of the
+    outputs of the op has an associated gradient.
-    """
+    The op.grad(...) functions may be called in several ways (for the
+    convenience of the op implementer) depending on the number of inputs and
+    outputs.  
-    def __init__(self, dct={}):
+    If there is one input and one output:
-        self.map = {}
+        op.grad( op.inputs[0], grad(op.outputs[0]))
-        self.outputs = []
-        self.did_bprop = False
-        self.items_got = set([])
-        for key,val in dct.items():
-            self.add_output(key,val)
-    def __contains__(self, item):
+    If there are several inputs and one output:
-        return item in self.map
+        op.grad( op.inputs, grad(op.outputs[0]))
-    def __getitem__(self, r):
+    If there is one input and several outputs:
-        """Return the gradient wrt result r
+        op.grad( op.inputs[0], [grad(o) for o in op.outputs[0]])
-        r is also added to the set of things for which the gradient has been
-        given.  Subsequent attempts to modify the gradient wrt r will fail
-        with exception FixedGradientError.
-        """
-        self.items_got.add(r)
-        try:
-            return self.map[r]
-        except KeyError:
-            return None
-    def __call__(self, r):
-        """Return the gradient wrt result r"""
-        return self.__getitem__(r)
-    def add_output(self, r, dr):
-        self.add(r, dr)
-        self.outputs.append(r)
-    def add(self, r, dr):
-        """Add dr to the sum of gradients associated with r."""
-        if r in self.items_got:
-            raise OrderError('gradient has already been retrieved', r)
-        if r in self.map:
-            self.map[r] = self.map[r] + dr
-        else:
-            self.map[r] = dr
-    def bprop(self):
-        """Build a backpropagation graph.
-        This function traverses the graph backward from self.outputs, calling
-        update_gradient on the ops as it goes.  Ops without an update_gradient
-        function are considered not differentiable.  The update_gradient
-        function is defined in the UpdateGradient class.
-        maybe_redo
-        """
-        if self.did_bprop:
-            raise OrderError('bprop has already been done')
-        try:
-            outputs = self.outputs
-            inputs = gof.graph.inputs(outputs)
-            for op in gof.graph.io_toposort(inputs, outputs).__reversed__():
-                op.update_gradient(self)
-        finally:
-            self.did_bprop = True
-def grad(cost, param=None, cost_grad = 1.0):
-    """Return symbolic expression of gradient of <cost> wrt <param>.
-    If <param> is None, then return a Grad instance, from which the gradients of
+    If there are multiple inputs and outputs:
-    multiple objects can be retrieved using the __getitem__ or __call__ methods
+        op.grad( op.inputs, [grad(o) for o in op.outputs[0]])
-    (as in function currying in languages such as scheme and OCaML).
-    If <param> is not None, then return the gradient expression for 
+    This function expects the op.grad(...) function to return the gradient
-    d cost / d param.
+    expression [results] associated with the inputs of the op.  If the op has a
+    single input, it should return a single result; if the op has multiple
+    inputs, it should return a list of results corresponding to the gradients in
+    the same order as the inputs.
-    """
+    For each input wrt to which an op is not differentiable, it should return
-    rval = Grad({cost:cost_grad})
+    None instead of a result instance.
-    rval.bprop()
-    if param is None:
-        return rval
-    else:
-        return rval(param)
-class UpdateGradient:
-    """This class defines the interface that Grad.bprop expects of each
-    differentiable Op"""
-    def update_gradient(self, grad_d):
-        """Override this function to call grad_d.add(r,grad_r) for each
-        differentiable input result, r.
-        You can assume that the gradient with respect to all output results
-        has been accumulated in grad_d.  These expressions are available by
-        calling grad_d[o] for o in self.outputs.  If grad_d[o] returns None,
-        then this function should assume that grad_d[o] is an appropriate sort
-        of zero.
-        """
-        raise AbstractFunctionError()
-class SelfGrad (UpdateGradient):
-    """This class implements update_gradient in terms of the popular self.grad
-    This class defines update_gradient (necessary for Grad.bprop) to call a
-    self.grad function like this:
-        passed_inputs = self.inputs
-        if len(self.inputs) == 1: passed_inputs = passed_inputs[0]
-        passed_ograds = [grad_d[o] for o in self.outputs]
-        if len(self.outputs) == 1: passed_ograds = passed_ograds[0]
-        igrads = self.grad(passed_inputs, passed_ograds)
-        if len(self.inputs) == 1: igrads = [igrads]
-    self.grad() is an Abstract function, see its documentation for the
-    expected behaviour.
    """
+    gmap = {}
-    def update_gradient(self, grad_d):
+    for (r, g_r) in sources:
-        #Call self.grad(inputs, output_gradients) and add the result to grad_d
+        if g_r is not None:
+            if r in gmap:
-        inputgs = gof.utils.from_return_values(
+                gmap[r] = gmap[r] + g_r
-            self.grad(gof.utils.to_return_values(self.inputs),
+            else:
-                      gof.utils.to_return_values([grad_d[o] for o in self.outputs])))
+                gmap[r] = g_r
-        assert len(inputgs) == len(self.inputs)
+    graph_outputs = gmap.keys()
+    if graph_inputs is None:
+        graph_inputs = gof.graph.inputs(graph_outputs)
-        for input, inputgrad in zip(self.inputs, inputgs):
+    for op in gof.graph.io_toposort(graph_inputs, graph_outputs).__reversed__():
-            grad_d.add(input, inputgrad)
+        g_outputs = [gmap.get(o,None) for o in op.outputs]
-    def grad(self, *args):
+        #if all output gradients are None, continue
-        """Return gradient expressions wrt input arguments
+        if all(map(lambda x:x is None, g_outputs)): continue
-        If len(self.inputs)==1 : return the input gradient expression
+        output_arg = _unpack_result(g_outputs)
-        If len(self.inputs)>=2 : return a list of input gradient expressions 
+        input_arg = _unpack_result(op.inputs)
-        """
+        op_grad = op.grad(input_arg, output_arg)
-        raise AbstractFunctionError()
+        if op_grad is None:
+            raise ValueError(_msg_retNone, op.__class__)
+        g_inputs = _pack_result(op_grad)
+        if len(g_inputs) != len(op.inputs):
+            raise ValueError(_msg_badlen, 
+                    op.__class__, 
+                    len(g_inputs),
+                    len(op.inputs))
+        for r, g_r in zip(op.inputs, g_inputs):
+            if g_r is not None: 
+                if r in gmap:
+                    gmap[r] = gmap[r] + g_r
+                else:
+                    gmap[r] = g_r
+    return gmap
+def grad(cost, param):
+    """Return symbolic expression of gradient of <cost> wrt <param>.
+    If <param> is a list, then return a list containing the gradient of cost wrt
+    each element of the list.
+    """
+    inputs = gof.graph.inputs([cost])
+    gmap = grad_sources_inputs([(cost, 1.0)], inputs)
+    if isinstance(param, list):
+        return [gmap.get(p, None) for p in param]
+    else:
+        return gmap.get(param, None)
--- a/tensor_ops.py
+++ b/tensor_ops.py
@@ -2,7 +2,6 @@
 from gof import Op, utils, Destroyer, Viewer
 import gof.op
-import gradient
 from tensor import *
@@ -24,7 +23,7 @@ def _wrap_as_tensor(x):
 # Ops in this file.
 # It is not necessary to inherit from TensorOp to make an Op that manipulates
 # Tensors.
-class TensorOp(Op, gradient.SelfGrad):
+class TensorOp(Op):
    nin = -1
    nout = 1