moving away from Grad

30d14420 · bergstrj@iro.umontreal.ca · 403d94df · 30d14420 · 30d14420 · 30d14420
--- a/_test_gradient.py
+++ b/_test_gradient.py
@@ -17,7 +17,12 @@ def matrices(n):
    return [matrix() for i in xrange(n)]


-class _testCase (unittest.TestCase):
+class _testNone(unitTest.TestCase):
+    def test0(self):
+
+
+
+class _testCase_matinv:# (unittest.TestCase):
    def setUp(self):
        numpy.random.seed(1)
    def matinv(self,dim):
@@ -48,7 +53,7 @@ class _testCase (unittest.TestCase):
        self.assertEqual(('2.67327580893', '0.000438649434819'), self.matinv(3))


-class _testCase_old:
+class _testCase_old:#(unittest.TestCase):

    class posneg(T._TensorOp):
        nout=2

--- a/gradient.py
+++ b/gradient.py
 import gof

-class OrderError(Exception):
-    """Grad has been manipulated in the wrong order"""
+def _unpack_result(lst):
+    if len(lst) > 1:
+        return lst
+    else
+        return lst[0]

-class Grad(object):
-    """A dictionary-like class, into which derivative expressions may be added.
+def _pack_result(arg):
+    if gof.result.is_result(arg): return [arg]
+    return arg

-    Attributes:
-    map - dict: result -> grad(result)
-    outputs - list: results from which to backpropagate gradient
-    did_bprop - bool: has bprop been called?
-    items_got - set: results for which we have returned the gradient
+def grad_sources_inputs(sources, inputs):
+    """Return a dictionary mapping each result necessary for a source to its gradient

+    sources - a list of gradient sources (explained below)
+    inputs - a list of results considered to be constant

-    Methods:
+    A gradient source is a pair (r, g_r), in which r is a result, and g_r is a
+    result that is a gradient wrt r.

-    add() - accumulate a gradient expression
-    bprop() - recursively construct gradient expressions
-    __call__() - retrieve the gradient wrt a given Op or result
-    __getitem__() - retrieve the gradient wrt a given Op or result
+    This function traverses the graph backward from the 'r' sources,
+    calling op.grad(...) when it is provided by an op, and at least one of the
+    outputs of the op has an associated gradient.

-    This class operates on graphs of nodes which implement the UpdateGradient interface.
+    The op.grad(...) functions may be called in several ways (for the
+    convenience of the op implementer) depending on the number of inputs and
+    outputs.  

-    """
+    If there is one input and one output:
+        op.grad( op.inputs[0], grad(op.outputs[0]))

-    def __init__(self, dct={}):
-        self.map = {}
-        self.outputs = []
-        self.did_bprop = False
-        self.items_got = set([])
-        for key,val in dct.items():
-            self.add_output(key,val)
+    If there are several inputs and one output:
+        op.grad( op.inputs, grad(op.outputs[0]))

-    def __contains__(self, item):
-        return item in self.map
+    If there is one input and several outputs:
+        op.grad( op.inputs[0], [grad(o) for o in op.outputs[0]])

-    def __getitem__(self, r):
-        """Return the gradient wrt result r
-        
-        r is also added to the set of things for which the gradient has been
-        given.  Subsequent attempts to modify the gradient wrt r will fail
-        with exception FixedGradientError.
-        """
-        self.items_got.add(r)
-        try:
-            return self.map[r]
-        except KeyError:
-            return None
-    def __call__(self, r):
-        """Return the gradient wrt result r"""
-        return self.__getitem__(r)
-
-    def add_output(self, r, dr):
-        self.add(r, dr)
-        self.outputs.append(r)
-        
-    def add(self, r, dr):
-        """Add dr to the sum of gradients associated with r."""
-        if r in self.items_got:
-            raise OrderError('gradient has already been retrieved', r)
-        if r in self.map:
-            self.map[r] = self.map[r] + dr
-        else:
-            self.map[r] = dr
-
-    def bprop(self):
-        """Build a backpropagation graph.
-
-        This function traverses the graph backward from self.outputs, calling
-        update_gradient on the ops as it goes.  Ops without an update_gradient
-        function are considered not differentiable.  The update_gradient
-        function is defined in the UpdateGradient class.
-
-        maybe_redo
-        """
-        if self.did_bprop:
-            raise OrderError('bprop has already been done')
-        try:
-            outputs = self.outputs
-            inputs = gof.graph.inputs(outputs)
-            for op in gof.graph.io_toposort(inputs, outputs).__reversed__():
-                op.update_gradient(self)
-        finally:
-            self.did_bprop = True
-
-def grad(cost, param=None, cost_grad = 1.0):
-    """Return symbolic expression of gradient of <cost> wrt <param>.
+    If there are multiple inputs and outputs:
+        op.grad( op.inputs, [grad(o) for o in op.outputs[0]])

-    If <param> is None, then return a Grad instance, from which the gradients of
-    multiple objects can be retrieved using the __getitem__ or __call__ methods
-    (as in function currying in languages such as scheme and OCaML).
+    This function expects the op.grad(...) function to return the gradient
+    expression [results] associated with the inputs of the op.  If the op has a
+    single input, it should return a single result; if the op has multiple
+    inputs, it should return a list of results corresponding to the gradients in
+    the same order as the inputs.

-    If <param> is not None, then return the gradient expression for 
-    d cost / d param.
+    For each input wrt to which an op is not differentiable, it should return
+    None instead of a result instance.

    """
-    rval = Grad({cost:cost_grad})
-    rval.bprop()
-    if param is None:
-        return rval
-    else:
-        return rval(param)

+    gmap = {}
+    for (r, g_r) in self.sources:
+        if r in gmap:
+            gmap[r] = gmap[r] + dr
+        else:
+            gmap[r] = dr

-class UpdateGradient:
-    """This class defines the interface that Grad.bprop expects of each
-    differentiable Op"""
-
-    def update_gradient(self, grad_d):
-        """Override this function to call grad_d.add(r,grad_r) for each
-        differentiable input result, r.
-
-        You can assume that the gradient with respect to all output results
-        has been accumulated in grad_d.  These expressions are available by
-        calling grad_d[o] for o in self.outputs.  If grad_d[o] returns None,
-        then this function should assume that grad_d[o] is an appropriate sort
-        of zero.
+    outputs = gmap.keys()
+    
+    if inputs is None:
+        inputs = gof.graph.inputs(outputs)
        
-        """
-        raise AbstractFunctionError()
-
-class SelfGrad (UpdateGradient):
-    """This class implements update_gradient in terms of the popular self.grad
-
-    This class defines update_gradient (necessary for Grad.bprop) to call a
-    self.grad function like this:
-
-        if len(self.outputs) > 1:
-            self.grad(self.inputs, [grad_d[o] for o in self.outputs])
-        else
-            self.grad(self.inputs, grad_d[output[0]])
+    for op in gof.graph.io_toposort(inputs, outputs).__reversed__():
+        g_outputs = [gmap[o] for o in self.outputs]
+        if all(map(lambda x:x is None, g_outputs)):
+            continue
+        output_arg = unpack_singleton(g_outputs)
+        input_arg = unpack_singleton(op.inputs)
+        op_grad = op.grad(input_arg, output_arg)
+        if op_grad is None:
+            raise Exception('If you really mean for grad(...) to return None,
+            please return [None]', op.__class__)
+        g_inputs = pack_singleton(op_grad)
+        assert len(g_inputs) == len(op.inputs)
+
+        for r, g_r in zip(self.inputs, g_inputs):
+            if g_r is not None: 
+                if r in gmap:
+                    gmap[r] = gmap[r] + g_r
+                else:
+                    gmap[r] = g_r
+    return gmap
+
+def diff(cost, param):
+    """Return symbolic expression of gradient of <cost> wrt <param>.

-    self.grad() is an Abstract function, see its documentation for the
-    expected behaviour.
-    
+    If <param> is a list, then return a list containing the gradient of cost wrt
+    each element of the list.
    """
-
-    def update_gradient(self, grad_d):
-        #Call self.grad(inputs, output_gradients) and add the result to grad_d
-
-        if len(self.outputs) > 1:
-            inputgs = self.grad(self.inputs, [grad_d[o] for o in self.outputs])
-        else:
-            inputgs = self.grad(self.inputs, grad_d[self.outputs[0]])
-
-        if len(self.inputs) == 1 and is_result(inputgs):
-            inputgs = [inputgs]
-        else:
-            assert len(inputgs) == len(self.inputs)
-        for input, inputgrad in zip(self.inputs, inputgs):
-            grad_d.add(input, inputgrad)
-
-    def grad(self, *args):
-        """Return gradient expressions wrt input arguments
-
-        If len(self.inputs)==1 : return the input gradient expression
-        If len(self.inputs)>=2 : return a list of input gradient expressions 
-        """
-        raise AbstractFunctionError()
+    inputs = gof.graph.inputs([cost])
+    gmap = grad_sources_inputs([(cost, 1.0)], inputs)
+    if isinstance(param, lst):
+        return [gmap[p] for p in param]
+    else:
+        return gmap[param]


--- a/tensor_ops.py
+++ b/tensor_ops.py
@@ -2,7 +2,6 @@
 from gof import Op, utils, Destroyer, Viewer
 import gof.op

-import gradient
 from tensor import *


@@ -24,7 +23,7 @@ def _wrap_as_tensor(x):
 # Ops in this file.
 # It is not necessary to inherit from TensorOp to make an Op that manipulates
 # Tensors.
-class TensorOp(Op, gradient.SelfGrad):
+class TensorOp(Op):

    nin = -1
    nout = 1