added implementation of grad_sources_inputs

1ce5cffc · Ian Goodfellow · cebbef1b · 1ce5cffc
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -600,6 +600,116 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
    return rval


+
+def grad_sources_inputs(sources, graph_inputs, warn_type = 'ignored'):
+    global tensor
+    if tensor is None:
+        from theano import tensor
+
+
+    outputs, output_grads = zip(*sources)
+    wrt = graph_inputs
+
+
+    #set of variables that has had children added to it
+    marked = set([])
+    #set of variables that have been added to their parents
+    accounted_for = set([])
+
+    #use a try/finally to make sure we don't leave any marks
+    #on the variables
+    try:
+        #mark the variables in the relevant subgraph with
+        #a dictionary called chidlren
+        #var._children[node] gives the index of var in _children.inputs
+        def account_for(var):
+            if var in accounted_for:
+                return
+            accounted_for.add(var)
+            if var.owner is not None:
+                node = var.owner
+                for i, ipt in enumerate(node.inputs):
+                    if not hasattr(ipt, '_children'):
+                        marked.add(ipt)
+                        ipt._children = {}
+                    if node not in ipt._children:
+                        ipt._children[node] = i
+                    account_for(ipt)
+
+        for output in outputs:
+            account_for(output)
+
+        #build a dict mapping var to the gradient of cost with respect to var
+        grad_dict = {}
+        #by default, the gradient of the cost is 1
+        for output, output_grad in sources:
+            grad_dict[output] = output_grad
+
+        #variables that do not influence the cost have zero gradient.
+        #if wrt is such a varibale, populate the grad_dict with this info
+        #so that wrt not having _children won't cause an error below
+        #according to the flag, possibly raise an error if wrt is disconnected
+        for elem in wrt:
+            if elem not in marked and elem not in outputs:
+                message = ("grad method was asked to compute the gradient "
+                        "with respect to a variable that is not part of "
+                        "the computational graph of the cost, or is used "
+                        "only by a non-differentiable operator: %s" % elem)
+                #raise ValueError(message)
+                grad_dict[elem] = elem.zeros_like()
+
+        #build a dict mapping node to the terms node contributes to each of its inputs' gradients
+        term_dict = {}
+
+        #populate term_dict[node] and return it
+        def access_term_cache(node):
+            if node not in term_dict:
+                #must convert to list in case the op returns a tuple
+                #we won't be able to post-process out the Nones if it does that
+                term_dict[node] = list(node.op.grad(node.inputs,
+                        [access_grad_cache(var) for var in node.outputs]))
+                for i in xrange(len(term_dict[node])):
+                    if term_dict[node][i] is None:
+                        term_dict[node][i] = tensor.zeros_like(node.inputs[i])
+                    if isinstance(term_dict[node][i].type,NaNType):
+                        raise TypeError("tensor.grad encountered a NaN. "+\
+                                term_dict[node][i].type.why_nan)
+            return term_dict[node]
+
+
+        #built-in python sum adds an extraneous TensorConstant(0)
+        #we can exploit the knowledge that iterable always has at
+        #least one element to avoid starting the sum at 0
+        def nonempty_sum( iterable ):
+            rval = iterable[0]
+            for elem in iterable[1:]:
+                rval = rval + elem
+            return rval
+
+        #populate grad_dict[var] and return it
+        def access_grad_cache(var):
+            if var not in grad_dict:
+                if hasattr(var,'_children'):
+                    terms = []
+                    for child in var._children.keys():
+                        idx = var._children[child]
+                        terms.append( access_term_cache(child)[idx])
+                    grad_dict[var] = nonempty_sum(terms)
+                else:
+                    #this variable is not connected to the cost in the computational
+                    #graph so the gradient on it is zero
+                    grad_dict[var] = tensor.zeros_like(var)
+            return grad_dict[var]
+
+
+        rval = [ access_grad_cache(elem) for elem in wrt ]
+    finally:
+        #take the marks out
+        for node in marked:
+            del node._children
+
+    return grad_dict
+
 def grad_wrong(cost, wrt, g_cost=None, consider_constant=None, warn_type=False,
         disconnected_inputs='raise'):
    """