Merge pull request #1326 from delallea/notimplemented_elemwise_grad

Fixed crash for unimplemented elemwise gradient

Merge pull request #1326 from delallea/notimplemented_elemwise_grad
83d77e84 · lamblin · 74cc080d · d4436e8e · 83d77e84 · 83d77e84
--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -127,7 +127,8 @@ class DimShuffle(Op):
        for i, j in enumerate(new_order):
            if j != 'x':
-                # There is a bug in numpy that results in isinstance(x, int) returning False for numpy integers.
+                # There is a bug in numpy that results in isinstance(x, int)
+                # returning False for numpy integers.
                # See <http://projects.scipy.org/numpy/ticket/2235>.
                if not isinstance(j, (int, numpy.integer)):
                    raise TypeError(
@@ -135,7 +136,7 @@ class DimShuffle(Op):
                if j >= len(input_broadcastable):
                    raise ValueError(("new_order[%d] is %d, but the input "
                        "only has %d axes.") %
-                        (i,j,len(input_broadcastable)))
+                        (i, j, len(input_broadcastable)))
                if j in new_order[(i + 1):]:
                    raise ValueError((
                    "The same input dimension may not appear twice in the "
@@ -659,11 +660,9 @@ class Elemwise(Op):
    def grad(self, inputs, ograds):
        outs = self(*inputs)
-        if not isinstance(outs, (list,tuple)):
+        if not isinstance(outs, (list, tuple)):
-            outs = [ outs ]
+            outs = [outs]
        #compute grad with respect to broadcasted input
        rval = self._bgrad(inputs, ograds)
@@ -694,7 +693,6 @@ class Elemwise(Op):
                    new_rval.append(elem)
            return new_rval
        #sum out the broadcasted dimensions
        for i, ipt in enumerate(inputs):
            if rval[i] is None:
@@ -758,7 +756,7 @@ class Elemwise(Op):
        def transform(r):
            # From a graph of ScalarOps, make a graph of Broadcast ops.
-            if isinstance(r.type, DisconnectedType):
+            if isinstance(r.type, (NullType, DisconnectedType)):
                return r
            if r in scalar_inputs:
                return inputs[scalar_inputs.index(r)]
@@ -1183,7 +1181,8 @@ class CAReduce(Op):
        if axis is None:
            self.axis = axis
-        # There is a bug in numpy that results in isinstance(x, int) returning False for numpy integers.
+        # There is a bug in numpy that results in isinstance(x, int) returning
+        # False for numpy integers.
        # See <http://projects.scipy.org/numpy/ticket/2235>.
        elif isinstance(axis, (int, numpy.integer)):
            self.axis = (axis,)

--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -98,6 +98,7 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
        y = x.dimshuffle(('x',) * (numpy.MAXDIMS + 1))
        self.assertRaises(ValueError, y.eval, {x: 0})
 class test_Broadcast(unittest.TestCase):
    def setUp(self):
        unittest_tools.seed_rng()
@@ -749,7 +750,8 @@ class T_mean_dtype(unittest.TestCase):
            x = tensor.matrix(dtype=input_dtype)
            for sum_dtype in imap(str, theano.scalar.all_types):
                axis = axes[idx % len(axes)]
-                # If the inner sum cannot be created, it will raise a TypeError.
+                # If the inner sum cannot be created, it will raise a
+                # TypeError.
                try:
                    mean_var = x.mean(dtype=sum_dtype, axis=axis)
                except TypeError:
@@ -757,10 +759,11 @@ class T_mean_dtype(unittest.TestCase):
                else:
                    # Executed if no TypeError was raised
                    if sum_dtype in tensor.discrete_dtypes:
-                        assert mean_var.dtype == 'float64', (mean_var.dtype, sum_dtype)
+                        assert mean_var.dtype == 'float64', (
+                                (mean_var.dtype, sum_dtype))
                    else:
-                        assert mean_var.dtype == sum_dtype, (mean_var.dtype, sum_dtype)
+                        assert mean_var.dtype == sum_dtype, (
+                                (mean_var.dtype, sum_dtype))
                    # Check that we can take the gradient, when implemented
                    if "complex" in mean_var.dtype:
                        continue
@@ -920,7 +923,7 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
    def test_prod_without_zeros_custom_dtype(self):
        """
-        Test the ability to provide your own output dtype for a ProdWithoutZeros().
+        Test ability to provide your own output dtype for a ProdWithoutZeros().
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
@@ -936,7 +939,7 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
    def test_prod_without_zeros_custom_acc_dtype(self):
        """
-        Test the ability to provide your own acc_dtype for a ProdWithoutZeros().
+        Test ability to provide your own acc_dtype for a ProdWithoutZeros().
        """
        # We try multiple axis combinations even though axis should not matter.
        axes = [None, 0, 1, [0], [1], [0, 1]]
@@ -1010,7 +1013,8 @@ def test_gt_grad():
    T = theano.tensor
    input_ = T.vector(dtype=floatX)
-    random_values = numpy.random.RandomState(1234).uniform(low=-1, high=1, size=(2,2))
+    random_values = numpy.random.RandomState(1234).uniform(
+                                                low=-1, high=1, size=(2, 2))
    W_values = numpy.asarray(random_values, dtype=floatX)
    W = theano.shared(value=W_values, name='weights')
    correct_score = T.dot(input_, W)
@@ -1032,15 +1036,17 @@ if __name__ == '__main__':
    unittest.TextTestRunner().run(suite)
 """
 def test_clip_grad():
    # test the gradient of clip
-    def func(x,y,z):
+    def func(x, y, z):
-        return theano.tensor.clip(x,y,z)
+        return theano.tensor.clip(x, y, z)
    # use an x value less than y, an x value between y and z, and an x value
    # greater than z
    unittest_tools.verify_grad(func,
-            [ numpy.asarray([-1.,0.5,2.]), 0., 1.])
+            [numpy.asarray([-1., 0.5, 2.]), 0., 1.])
 def test_clip_grad_int():
@@ -1048,10 +1054,40 @@ def test_clip_grad_int():
    x = tensor.iscalar()
    y = tensor.iscalar()
    z = tensor.iscalar()
-    c = tensor.clip(x,y,z)
+    c = tensor.clip(x, y, z)
    tensor.grad(c, [x, y, z])
+def test_not_implemented_elemwise_grad():
+    """
+    Regression test for unimplemented gradient in an Elemwise Op.
+    """
+    class TestOp(scalar.ScalarOp):
+        def __init__(self):
+            self.output_types_preference = scalar.upgrade_to_float
+        def impl(self, n, x):
+            return x * n
+        def grad(self, (n, x), (gz,)):
+            dy_dx = n
+            return [theano.gradient.grad_not_implemented(self, 0, n),
+                    gz * dy_dx]
+    test_op = tensor.Elemwise(TestOp())
+    x = tensor.scalar()
+    # The call to `grad` used to crash.
+    tensor.grad(test_op(2, x), x)
+    # Verify that trying to use the not implemented gradient fails.
+    try:
+        tensor.grad(test_op(x, 2), x)
+        assert False
+    except theano.gradient.NullTypeGradError:
+        pass
 if __name__ == '__main__':
    t = TestElemwise('setUp')