made unimplemented and undefined grads handled by NaNType

cb94334d · Ian Goodfellow · 122d7246 · cb94334d · cb94334d · cb94334d
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -21,6 +21,7 @@ from theano.gof import Variable
 from theano.gof.python25 import all
 import theano.gof.utils
 tensor = None
+from theano.gof.nan_type import NaNType

 _msg_retType = 'op.grad(...) returned a non-list'
 _msg_badlen = 'op.grad(...) returned wrong number of gradients'
@@ -193,32 +194,6 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
                    gmap[r] = g_r
    return gmap

-
-class GradNotImplementedOp(gof.op.UncomputableOp):
-    """ An UncomputableOp representing a gradient that hasn't been implemented yet.
-    """
-
-    def __init__(self, op, x_pos, comment = ""):
-        """
-            op: A theano op  whose grad is not implemented for some input
-            x_pos: An int, giving the index in the op's input list of
-                a variable for which the gradient is not implemented
-                (if op has unimplemented gradients for several inputs,
-                it must still return a separate UnimplementedGradOp for
-                each)
-            comment: An optional comment explaining why the gradient isn't
-                implemented.
-        """
-
-        assert isinstance(op, gof.Op)
-        assert isinstance(x_pos, int)
-        assert x_pos >= 0
-
-        super(GradNotImplementedOp,self).__init__(NotImplementedError,
-            "%s does not implement its gradient with respect to input %d. %s" \
-            % (str(type(op)), x_pos, comment))
-
-
 def grad_not_implemented(op, x_pos, x, comment = ""):
    """
    Return an un-computable symbolic variable of type `x.type`.
@@ -233,38 +208,9 @@ def grad_not_implemented(op, x_pos, x, comment = ""):
    gradient is not implemented.
    """

-    return GradNotImplementedOp(op, x_pos, comment)(x)
-
-class GradUndefinedError(Exception):
-    """ An exception raised upon attempts to use an undefined gradient.
-    """
-
-class GradUndefinedOp(gof.op.UncomputableOp):
-    """ An UncomputableOp representing a gradient that is mathematically
-        undefined.
-    """
-
-    def __init__(self, op, x_pos, comment = ""):
-        """
-            op: A theano op  whose grad is mathematically undefined for
-                some input
-            x_pos: An int, giving the index in the op's input list of
-                a variable for which the gradient is undefined
-                (if op has undefined gradients for several inputs,
-                it must still return a separate GradUndefinedOp for
-                each)
-            comment: An optional comment explaining why the gradient isn't
-                defined.
-        """
-
-        assert isinstance(op, gof.Op)
-        assert isinstance(x_pos, int)
-        assert x_pos >= 0
-
-        super(GradUndefinedOp,self).__init__(GradUndefinedError,
-            "%s does not implement its gradient with respect to input %d. %s" \
-            % (str(type(op)), x_pos, comment))
-
+    return NaNType("This variable is NaN because the grad method for " + \
+            "input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
+            "not implemented.")()

 def grad_undefined(op, x_pos, x, comment = ""):
    """
@@ -280,7 +226,9 @@ def grad_undefined(op, x_pos, x, comment = ""):
    gradient is not defined.
    """

-    return GradUndefinedOp(op, x_pos, comment)(x)
+    return NaNType("This variable is NaN because the gradient for " + \
+            "input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
+            "mathematically undefined.")()



@@ -503,6 +451,11 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
    if tensor is None:
        from theano import tensor

+
+    if isinstance(cost.type, NaNType):
+        raise ValueError("Can't differentiate a NaN cost. cost is NaN because "+\
+                cost.type.why_nan)
+
    if consider_constant is None:
        consider_constant = []
    else:
@@ -593,6 +546,9 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
                term_dict[node] = node.op.grad(node.inputs,
                        [access_grad_cache(var) for var in node.outputs])
                for i in xrange(len(term_dict[node])):
+                    if isinstance(term_dict[node][i].type,NaNType):
+                        raise TypeError("tensor.grad encountered a NaN. "+\
+                                term_dict[node][i].type.why_nan)
                    if term_dict[node][i] is None:
                        term_dict[node][i] = tensor.zeros_like(node.inputs[i])
            return term_dict[node]

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -2217,6 +2217,7 @@ class T_argmin_argmax(unittest.TestCase):
    def test_grad_argmin(self):
        data = rand(2, 3)
        n = as_tensor_variable(data)
+        n.name = 'n'

        #test grad of argmin
        utt.verify_grad(lambda v: argmin(v, axis=-1), [data])
@@ -2228,7 +2229,11 @@ class T_argmin_argmax(unittest.TestCase):
        utt.verify_grad(lambda v: argmin(v.flatten()), [data])

        try:
-            grad(argmin(n, axis=-1), n)
+            cost = argmin(n, axis=-1)
+            cost.name = None
+            g = grad(cost, n)
+            from theano.printing import min_informative_str
+            print min_informative_str(g)
            raise Exception('Expected an error')
        except TypeError:
            pass

--- a/theano/tests/test_gradient.py
+++ b/theano/tests/test_gradient.py
@@ -6,264 +6,269 @@ import unittest
 import theano
 from theano import gof

-from theano.gradient import grad_sources_inputs
+#from theano.gradient import grad_sources_inputs
 from theano import gradient
 from theano.tensor.nnet.Conv3D import conv3D
 from theano import config


-def _grad_sources_inputs(*args):
+#def _grad_sources_inputs(*args):
    # warn_type was introduced after this code, it complains throughout for nothing.
-    return grad_sources_inputs(warn_type=False, *args)
+#    return grad_sources_inputs(warn_type=False, *args)

-class test_grad_sources_inputs(unittest.TestCase):
-    def test_retNone1(self):
-        """Test that it is not ok to return None from op.grad()"""
-        class retNone(gof.op.Op):
-            def make_node(self):
-                inputs = [gof.generic()]
-                outputs = [gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inp, grads):
-                x, = inp
-                gz, = grads
-                pass
-        a = retNone().make_node()
-        try:
-            _grad_sources_inputs([(a.out, 1)], None)
-        except ValueError, e:
-            self.assertTrue(e[0] is gradient._msg_retType)
-            return
-        self.fail()
-    def test_retNone1_b(self):
-        """Test that it is ok to return [None] from op.grad()"""
-        class retNone(gof.op.Op):
-            def make_node(self, *inputs):
-                outputs = [gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inp, grads):
-                return [None]
-        i = gof.generic()
-        a = retNone().make_node(i)
-        g = _grad_sources_inputs([(a.out, 1)], None)
-        self.assertTrue(not i in g)
+if 0:
+    #most of these tests are no longer relevant now that grad_sources_inputs is gone
+    #also, some of our policies about what is allowed or not have changed
+    #nonetheless, it may be a good idea to resurrect some of these tests and write
+    #them in terms of tensor.grad directly
+    class test_grad_sources_inputs(unittest.TestCase):
+        def test_retNone1(self):
+            """Test that it is not ok to return None from op.grad()"""
+            class retNone(gof.op.Op):
+                def make_node(self):
+                    inputs = [gof.generic()]
+                    outputs = [gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inp, grads):
+                    x, = inp
+                    gz, = grads
+                    pass
+            a = retNone().make_node()
+            try:
+                _grad_sources_inputs([(a.out, 1)], None)
+            except ValueError, e:
+                self.assertTrue(e[0] is gradient._msg_retType)
+                return
+            self.fail()
+        def test_retNone1_b(self):
+            """Test that it is ok to return [None] from op.grad()"""
+            class retNone(gof.op.Op):
+                def make_node(self, *inputs):
+                    outputs = [gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inp, grads):
+                    return [None]
+            i = gof.generic()
+            a = retNone().make_node(i)
+            g = _grad_sources_inputs([(a.out, 1)], None)
+            self.assertTrue(not i in g)

-    def test_wrong_rval_len1(self):
-        """Test that it is not ok to return the wrong number of gradients"""
-        class retNone(gof.op.Op):
-            def make_node(self, *inputs):
-                outputs = [gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inputs, grads):
-                return [None]
+        def test_wrong_rval_len1(self):
+            """Test that it is not ok to return the wrong number of gradients"""
+            class retNone(gof.op.Op):
+                def make_node(self, *inputs):
+                    outputs = [gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inputs, grads):
+                    return [None]

-        i = gof.generic()
-        j = gof.generic()
-        a1 = retNone().make_node(i)
-        g = _grad_sources_inputs([(a1.out, 1)], None)
-        a2 = retNone().make_node(i,j)
-        try:
-            g = _grad_sources_inputs([(a2.out, 1)], None)
-        except ValueError, e:
-            self.assertTrue(e[0] is gradient._msg_badlen)
-            return
-        self.fail()
+            i = gof.generic()
+            j = gof.generic()
+            a1 = retNone().make_node(i)
+            g = _grad_sources_inputs([(a1.out, 1)], None)
+            a2 = retNone().make_node(i,j)
+            try:
+                g = _grad_sources_inputs([(a2.out, 1)], None)
+            except ValueError, e:
+                self.assertTrue(e[0] is gradient._msg_badlen)
+                return
+            self.fail()


-    def test_stop_on_all_none(self):
-        """Test that op.grad() is not called when output grads are all None"""
-        class retNone(gof.op.Op):
-            def __init__(self, tst):
-                self.tst = tst
-            def make_node(self, *inputs):
-                outputs = [gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inputs, grads):
-                self.tst.fail()
+        def test_stop_on_all_none(self):
+            """Test that op.grad() is not called when output grads are all None"""
+            class retNone(gof.op.Op):
+                def __init__(self, tst):
+                    self.tst = tst
+                def make_node(self, *inputs):
+                    outputs = [gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inputs, grads):
+                    self.tst.fail()

-        i = gof.generic()
-        a1 = retNone(self).make_node(i)
-        g = _grad_sources_inputs([(a1.out, None)], None)
+            i = gof.generic()
+            a1 = retNone(self).make_node(i)
+            g = _grad_sources_inputs([(a1.out, None)], None)

-    def test_1in_1out(self):
-        """Test grad is called correctly for a 1-to-1 op"""
-        gval = gof.generic()
-        class O(gof.op.Op):
-            def make_node(self):
-                inputs = [gof.generic()]
-                outputs = [gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inp, grads):
-                return gval,
-        a1 = O().make_node()
-        g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
-        self.assertTrue(g[a1.inputs[0]] is gval)
+        def test_1in_1out(self):
+            """Test grad is called correctly for a 1-to-1 op"""
+            gval = gof.generic()
+            class O(gof.op.Op):
+                def make_node(self):
+                    inputs = [gof.generic()]
+                    outputs = [gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inp, grads):
+                    return gval,
+            a1 = O().make_node()
+            g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
+            self.assertTrue(g[a1.inputs[0]] is gval)

-    def test_1in_Nout(self):
-        """Test grad is called correctly for a 1-to-many op"""
-        gval = gof.generic()
-        class O(gof.op.Op):
-            def make_node(self):
-                inputs = [gof.generic()]
-                outputs = [gof.generic(),gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inp, grads):
-                x, = inp
-                gz1, gz2 = grads
-                return gval,
-        a1 = O().make_node()
-        g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
-        self.assertTrue(g[a1.inputs[0]] is gval)
-    def test_Nin_1out(self):
-        """Test grad is called correctly for a many-to-1 op"""
-        gval0 = gof.generic()
-        gval1 = gof.generic()
-        class O(gof.op.Op):
-            def make_node(self):
-                inputs = [gof.generic(),gof.generic()]
-                outputs = [gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inp, grads):
-                x0, x1 = inp
-                gz, = grads
-                return (gval0, gval1)
-        a1 = O().make_node()
-        g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
-        self.assertTrue(g[a1.inputs[0]] is gval0)
-        self.assertTrue(g[a1.inputs[1]] is gval1)
-    def test_Nin_Nout(self):
-        """Test grad is called correctly for a many-to-many op"""
-        gval0 = gof.generic()
-        gval1 = gof.generic()
-        class O(gof.op.Op):
-            def make_node(self):
-                inputs = [gof.generic(),gof.generic()]
-                outputs = [gof.generic(),gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inp, grads):
-                return gval0, gval1
-        a1 = O().make_node()
-        g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
-        self.assertTrue(g[a1.inputs[0]] is gval0)
-        self.assertTrue(g[a1.inputs[1]] is gval1)
-    def test_some_None_ograds(self):
-        """Test grad is called when some output gradients are None"""
-        class O(gof.op.Op):
-            def __init__(self, tst):
-                self.tst = tst
-            def make_node(self, *inputs):
-                outputs = [gof.generic(),gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inputs, g_out):
-                return [1]
-        i = gof.generic()
-        a1 = O(self).make_node(i)
-        g = grad_sources_inputs([(a1.outputs[0], 1)], None, warn_type=False)
-        self.assertTrue(g[i] is 1)
+        def test_1in_Nout(self):
+            """Test grad is called correctly for a 1-to-many op"""
+            gval = gof.generic()
+            class O(gof.op.Op):
+                def make_node(self):
+                    inputs = [gof.generic()]
+                    outputs = [gof.generic(),gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inp, grads):
+                    x, = inp
+                    gz1, gz2 = grads
+                    return gval,
+            a1 = O().make_node()
+            g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
+            self.assertTrue(g[a1.inputs[0]] is gval)
+        def test_Nin_1out(self):
+            """Test grad is called correctly for a many-to-1 op"""
+            gval0 = gof.generic()
+            gval1 = gof.generic()
+            class O(gof.op.Op):
+                def make_node(self):
+                    inputs = [gof.generic(),gof.generic()]
+                    outputs = [gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inp, grads):
+                    x0, x1 = inp
+                    gz, = grads
+                    return (gval0, gval1)
+            a1 = O().make_node()
+            g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
+            self.assertTrue(g[a1.inputs[0]] is gval0)
+            self.assertTrue(g[a1.inputs[1]] is gval1)
+        def test_Nin_Nout(self):
+            """Test grad is called correctly for a many-to-many op"""
+            gval0 = gof.generic()
+            gval1 = gof.generic()
+            class O(gof.op.Op):
+                def make_node(self):
+                    inputs = [gof.generic(),gof.generic()]
+                    outputs = [gof.generic(),gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inp, grads):
+                    return gval0, gval1
+            a1 = O().make_node()
+            g = _grad_sources_inputs([(a1.outputs[0], 1)], None)
+            self.assertTrue(g[a1.inputs[0]] is gval0)
+            self.assertTrue(g[a1.inputs[1]] is gval1)
+        def test_some_None_ograds(self):
+            """Test grad is called when some output gradients are None"""
+            class O(gof.op.Op):
+                def __init__(self, tst):
+                    self.tst = tst
+                def make_node(self, *inputs):
+                    outputs = [gof.generic(),gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inputs, g_out):
+                    return [1]
+            i = gof.generic()
+            a1 = O(self).make_node(i)
+            g = grad_sources_inputs([(a1.outputs[0], 1)], None, warn_type=False)
+            self.assertTrue(g[i] is 1)

-    def test_some_None_igrads(self):
-        """Test that traversal works properly when an op return some None"""
-        class O(gof.op.Op):
-            def __init__(self, tst, grad_ok):
-                self.tst = tst
-                self.grad_ok = grad_ok
-            def make_node(self, *inputs):
-                outputs = [gof.generic(),gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inputs, g_out):
-                if not self.grad_ok:
-                    self.tst.fail()
-                else:
-                    return [1, None]
-        i = gof.generic()
-        j = gof.generic()
-        k = gof.generic()
-        a1 = O(self, True).make_node(i,j)
-        a2 = O(self, True).make_node(a1.outputs[1], k)
-        g = grad_sources_inputs([(a2.outputs[0], 1)], None, warn_type=False)
-        self.assertTrue(g[i] is 1 and j not in g and k not in g)
+        def test_some_None_igrads(self):
+            """Test that traversal works properly when an op return some None"""
+            class O(gof.op.Op):
+                def __init__(self, tst, grad_ok):
+                    self.tst = tst
+                    self.grad_ok = grad_ok
+                def make_node(self, *inputs):
+                    outputs = [gof.generic(),gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inputs, g_out):
+                    if not self.grad_ok:
+                        self.tst.fail()
+                    else:
+                        return [1, None]
+            i = gof.generic()
+            j = gof.generic()
+            k = gof.generic()
+            a1 = O(self, True).make_node(i,j)
+            a2 = O(self, True).make_node(a1.outputs[1], k)
+            g = grad_sources_inputs([(a2.outputs[0], 1)], None, warn_type=False)
+            self.assertTrue(g[i] is 1 and j not in g and k not in g)

-        a1 = O(self, True).make_node(i,j)
-        a2 = O(self, True).make_node(k, a1.outputs[1])
-        g = _grad_sources_inputs([(a2.outputs[0], 1)], None)
-        self.assertTrue(g[k] is 1 and i not in g and j not in g)
+            a1 = O(self, True).make_node(i,j)
+            a2 = O(self, True).make_node(k, a1.outputs[1])
+            g = _grad_sources_inputs([(a2.outputs[0], 1)], None)
+            self.assertTrue(g[k] is 1 and i not in g and j not in g)

-    def test_inputs(self):
-        """Test that passing inputs shortens the traversal"""
-        class O(gof.op.Op):
-            def __init__(self, tst, grad_ok):
-                self.tst = tst
-                self.grad_ok = grad_ok
-            def make_node(self, *inputs):
-                outputs = [gof.generic(),gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inputs, grads):
-                g0, g1 = grads
-                if not self.grad_ok:
-                    self.tst.fail()
-                else:
-                    if g1:
-                        return [g0, g0+g1]
+        def test_inputs(self):
+            """Test that passing inputs shortens the traversal"""
+            class O(gof.op.Op):
+                def __init__(self, tst, grad_ok):
+                    self.tst = tst
+                    self.grad_ok = grad_ok
+                def make_node(self, *inputs):
+                    outputs = [gof.generic(),gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inputs, grads):
+                    g0, g1 = grads
+                    if not self.grad_ok:
+                        self.tst.fail()
                    else:
-                        return [g0, g0]
-        i = gof.generic()
-        j = gof.generic()
-        k = gof.generic()
-        a1 = O(self, True).make_node(i,j)
-        a2 = O(self, True).make_node(k,a1.outputs[1])
-        g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
-            (a1.outputs[0], 3), (a1.outputs[0], 3)], a1.outputs)
-        self.assertTrue(g[a2.inputs[0]] == 1)
-        self.assertTrue(g[a2.inputs[1]] == 5)
-        self.assertTrue(g[a1.outputs[0]] == 6)
-        self.assertTrue(g[a1.outputs[1]] == 5)
-        self.assertTrue(a1.inputs[0] not in g)
-        self.assertTrue(a1.inputs[1] not in g)
+                        if g1:
+                            return [g0, g0+g1]
+                        else:
+                            return [g0, g0]
+            i = gof.generic()
+            j = gof.generic()
+            k = gof.generic()
+            a1 = O(self, True).make_node(i,j)
+            a2 = O(self, True).make_node(k,a1.outputs[1])
+            g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
+                (a1.outputs[0], 3), (a1.outputs[0], 3)], a1.outputs)
+            self.assertTrue(g[a2.inputs[0]] == 1)
+            self.assertTrue(g[a2.inputs[1]] == 5)
+            self.assertTrue(g[a1.outputs[0]] == 6)
+            self.assertTrue(g[a1.outputs[1]] == 5)
+            self.assertTrue(a1.inputs[0] not in g)
+            self.assertTrue(a1.inputs[1] not in g)

-    def test_multiple_sources(self):
-        """Test that passing multiple sources works"""
-        class O(gof.op.Op):
-            def __init__(self, tst, grad_ok):
-                self.tst = tst
-                self.grad_ok = grad_ok
-            def make_node(self, *inputs):
-                outputs = [gof.generic(),gof.generic()]
-                return gof.Apply(self, inputs, outputs)
-            def grad(self, inputs, grads):
-                g0, g1 = grads
-                if not self.grad_ok:
-                    self.tst.fail()
-                else:
-                    if g1:
-                        return [g0, g0+g1]
+        def test_multiple_sources(self):
+            """Test that passing multiple sources works"""
+            class O(gof.op.Op):
+                def __init__(self, tst, grad_ok):
+                    self.tst = tst
+                    self.grad_ok = grad_ok
+                def make_node(self, *inputs):
+                    outputs = [gof.generic(),gof.generic()]
+                    return gof.Apply(self, inputs, outputs)
+                def grad(self, inputs, grads):
+                    g0, g1 = grads
+                    if not self.grad_ok:
+                        self.tst.fail()
                    else:
-                        return [g0, g0]
-        i = gof.generic()
-        j = gof.generic()
-        k = gof.generic()
-        a1 = O(self,True).make_node(i,j)
-        a2 = O(self,True).make_node(k,a1.outputs[1])
-        g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
-            (a1.outputs[0], 3), (a1.outputs[0], 3)], None)
-        self.assertTrue(g[a2.inputs[0]] == 1)
-        self.assertTrue(g[a2.inputs[1]] == 5)
-        self.assertTrue(g[a1.outputs[0]] == 6)
-        self.assertTrue(g[a1.outputs[1]] == 5)
-        self.assertTrue(g[a1.inputs[0]] == 6)
-        self.assertTrue(g[a1.inputs[1]] == 11)
+                        if g1:
+                            return [g0, g0+g1]
+                        else:
+                            return [g0, g0]
+            i = gof.generic()
+            j = gof.generic()
+            k = gof.generic()
+            a1 = O(self,True).make_node(i,j)
+            a2 = O(self,True).make_node(k,a1.outputs[1])
+            g = _grad_sources_inputs([(a2.outputs[0], 1), (a1.outputs[1],4),
+                (a1.outputs[0], 3), (a1.outputs[0], 3)], None)
+            self.assertTrue(g[a2.inputs[0]] == 1)
+            self.assertTrue(g[a2.inputs[1]] == 5)
+            self.assertTrue(g[a1.outputs[0]] == 6)
+            self.assertTrue(g[a1.outputs[1]] == 5)
+            self.assertTrue(g[a1.inputs[0]] == 6)
+            self.assertTrue(g[a1.inputs[1]] == 11)

 def test_unimplemented_grad_func():
    #tests that function compilation catches unimplemented grads in the graph
    a = theano.tensor.vector()
    b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a)
    try:
-        f = theano.function([a], b)
+        f = theano.function([a], b, on_unused_input = 'ignore')
        assert 0
        #Note: it's important that the NotImplementedGradOp is caught
        #at COMPILATION time, not execution time.
        #If the uncomputable variable is, for example, multiplied by 0,
        #it could be optimized out of the final graph.
-    except NotImplementedError:
+    except TypeError:
        pass

 def test_undefined_grad_func():
@@ -271,13 +276,13 @@ def test_undefined_grad_func():
    a = theano.tensor.vector()
    b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
    try:
-        f = theano.function([a],b)
+        f = theano.function([a],b, on_unused_input = 'ignore')
        assert 0
-        #Note: it's important that the GradUndefinedOp is cauhgt at
+        #Note: it's important that the GradUndefinedOp is caught at
        #COMPILATION time, not execution time.
        #If the uncomputable variable is, for example, multiplied by0,
        #it could be optimized out of the final graph
-    except theano.gradient.GradUndefinedError:
+    except TypeError:
        pass

 def test_unimplemented_grad_grad():
@@ -296,7 +301,7 @@ def test_unimplemented_grad_grad():
    try:
        g = theano.gradient.grad(b,a)
        assert False
-    except NotImplementedError:
+    except TypeError:
        pass

 def test_undefined_grad_grad():
@@ -314,7 +319,7 @@ def test_undefined_grad_grad():
    try:
        g = theano.gradient.grad(Z.sum(),d)
        assert False
-    except theano.gradient.GradUndefinedError:
+    except TypeError:
        pass

 def test_grad_name():