made unimplemented and undefined grads handled by NaNType

cb94334d · Ian Goodfellow · 122d7246 · cb94334d · cb94334d · cb94334d
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -21,6 +21,7 @@ from theano.gof import Variable
 from theano.gof.python25 import all
 import theano.gof.utils
 tensor = None
+from theano.gof.nan_type import NaNType

 _msg_retType = 'op.grad(...) returned a non-list'
 _msg_badlen = 'op.grad(...) returned wrong number of gradients'
@@ -193,32 +194,6 @@ def grad_sources_inputs(sources, graph_inputs, warn_type=True):
                    gmap[r] = g_r
    return gmap

-
-class GradNotImplementedOp(gof.op.UncomputableOp):
-    """ An UncomputableOp representing a gradient that hasn't been implemented yet.
-    """
-
-    def __init__(self, op, x_pos, comment = ""):
-        """
-            op: A theano op  whose grad is not implemented for some input
-            x_pos: An int, giving the index in the op's input list of
-                a variable for which the gradient is not implemented
-                (if op has unimplemented gradients for several inputs,
-                it must still return a separate UnimplementedGradOp for
-                each)
-            comment: An optional comment explaining why the gradient isn't
-                implemented.
-        """
-
-        assert isinstance(op, gof.Op)
-        assert isinstance(x_pos, int)
-        assert x_pos >= 0
-
-        super(GradNotImplementedOp,self).__init__(NotImplementedError,
-            "%s does not implement its gradient with respect to input %d. %s" \
-            % (str(type(op)), x_pos, comment))
-
-
 def grad_not_implemented(op, x_pos, x, comment = ""):
    """
    Return an un-computable symbolic variable of type `x.type`.
@@ -233,38 +208,9 @@ def grad_not_implemented(op, x_pos, x, comment = ""):
    gradient is not implemented.
    """

-    return GradNotImplementedOp(op, x_pos, comment)(x)
-
-class GradUndefinedError(Exception):
-    """ An exception raised upon attempts to use an undefined gradient.
-    """
-
-class GradUndefinedOp(gof.op.UncomputableOp):
-    """ An UncomputableOp representing a gradient that is mathematically
-        undefined.
-    """
-
-    def __init__(self, op, x_pos, comment = ""):
-        """
-            op: A theano op  whose grad is mathematically undefined for
-                some input
-            x_pos: An int, giving the index in the op's input list of
-                a variable for which the gradient is undefined
-                (if op has undefined gradients for several inputs,
-                it must still return a separate GradUndefinedOp for
-                each)
-            comment: An optional comment explaining why the gradient isn't
-                defined.
-        """
-
-        assert isinstance(op, gof.Op)
-        assert isinstance(x_pos, int)
-        assert x_pos >= 0
-
-        super(GradUndefinedOp,self).__init__(GradUndefinedError,
-            "%s does not implement its gradient with respect to input %d. %s" \
-            % (str(type(op)), x_pos, comment))
-
+    return NaNType("This variable is NaN because the grad method for " + \
+            "input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
+            "not implemented.")()

 def grad_undefined(op, x_pos, x, comment = ""):
    """
@@ -280,7 +226,9 @@ def grad_undefined(op, x_pos, x, comment = ""):
    gradient is not defined.
    """

-    return GradUndefinedOp(op, x_pos, comment)(x)
+    return NaNType("This variable is NaN because the gradient for " + \
+            "input "+str(x_pos)+" ("+str(x)+") of the "+str(op)+" op is" + \
+            "mathematically undefined.")()



@@ -503,6 +451,11 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
    if tensor is None:
        from theano import tensor

+
+    if isinstance(cost.type, NaNType):
+        raise ValueError("Can't differentiate a NaN cost. cost is NaN because "+\
+                cost.type.why_nan)
+
    if consider_constant is None:
        consider_constant = []
    else:
@@ -593,6 +546,9 @@ def grad(cost, wrt, g_cost = None, consider_constant = None, warn_type = 'ignore
                term_dict[node] = node.op.grad(node.inputs,
                        [access_grad_cache(var) for var in node.outputs])
                for i in xrange(len(term_dict[node])):
+                    if isinstance(term_dict[node][i].type,NaNType):
+                        raise TypeError("tensor.grad encountered a NaN. "+\
+                                term_dict[node][i].type.why_nan)
                    if term_dict[node][i] is None:
                        term_dict[node][i] = tensor.zeros_like(node.inputs[i])
            return term_dict[node]

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -2217,6 +2217,7 @@ class T_argmin_argmax(unittest.TestCase):
    def test_grad_argmin(self):
        data = rand(2, 3)
        n = as_tensor_variable(data)
+        n.name = 'n'

        #test grad of argmin
        utt.verify_grad(lambda v: argmin(v, axis=-1), [data])
@@ -2228,7 +2229,11 @@ class T_argmin_argmax(unittest.TestCase):
        utt.verify_grad(lambda v: argmin(v.flatten()), [data])

        try:
-            grad(argmin(n, axis=-1), n)
+            cost = argmin(n, axis=-1)
+            cost.name = None
+            g = grad(cost, n)
+            from theano.printing import min_informative_str
+            print min_informative_str(g)
            raise Exception('Expected an error')
        except TypeError:
            pass

--- a/theano/tests/test_gradient.py
+++ b/theano/tests/test_gradient.py
@@ -6,17 +6,22 @@ import unittest
 import theano
 from theano import gof

-from theano.gradient import grad_sources_inputs
+#from theano.gradient import grad_sources_inputs
 from theano import gradient
 from theano.tensor.nnet.Conv3D import conv3D
 from theano import config


-def _grad_sources_inputs(*args):
+#def _grad_sources_inputs(*args):
    # warn_type was introduced after this code, it complains throughout for nothing.
-    return grad_sources_inputs(warn_type=False, *args)
+#    return grad_sources_inputs(warn_type=False, *args)

-class test_grad_sources_inputs(unittest.TestCase):
+if 0:
+    #most of these tests are no longer relevant now that grad_sources_inputs is gone
+    #also, some of our policies about what is allowed or not have changed
+    #nonetheless, it may be a good idea to resurrect some of these tests and write
+    #them in terms of tensor.grad directly
+    class test_grad_sources_inputs(unittest.TestCase):
        def test_retNone1(self):
            """Test that it is not ok to return None from op.grad()"""
            class retNone(gof.op.Op):
@@ -257,13 +262,13 @@ def test_unimplemented_grad_func():
    a = theano.tensor.vector()
    b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a)
    try:
-        f = theano.function([a], b)
+        f = theano.function([a], b, on_unused_input = 'ignore')
        assert 0
        #Note: it's important that the NotImplementedGradOp is caught
        #at COMPILATION time, not execution time.
        #If the uncomputable variable is, for example, multiplied by 0,
        #it could be optimized out of the final graph.
-    except NotImplementedError:
+    except TypeError:
        pass

 def test_undefined_grad_func():
@@ -271,13 +276,13 @@ def test_undefined_grad_func():
    a = theano.tensor.vector()
    b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
    try:
-        f = theano.function([a],b)
+        f = theano.function([a],b, on_unused_input = 'ignore')
        assert 0
-        #Note: it's important that the GradUndefinedOp is cauhgt at
+        #Note: it's important that the GradUndefinedOp is caught at
        #COMPILATION time, not execution time.
        #If the uncomputable variable is, for example, multiplied by0,
        #it could be optimized out of the final graph
-    except theano.gradient.GradUndefinedError:
+    except TypeError:
        pass

 def test_unimplemented_grad_grad():
@@ -296,7 +301,7 @@ def test_unimplemented_grad_grad():
    try:
        g = theano.gradient.grad(b,a)
        assert False
-    except NotImplementedError:
+    except TypeError:
        pass

 def test_undefined_grad_grad():
@@ -314,7 +319,7 @@ def test_undefined_grad_grad():
    try:
        g = theano.gradient.grad(Z.sum(),d)
        assert False
-    except theano.gradient.GradUndefinedError:
+    except TypeError:
        pass

 def test_grad_name():