Merge pull request #1537 from lamblin/fix_grad_none

Do not return None in scalar op's grad, add tests

Merge pull request #1537 from lamblin/fix_grad_none
14ab4945 · Frédéric Bastien · 6937f122 · 8e5725ea · 14ab4945 · 14ab4945
--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -579,9 +579,11 @@ class _scalar_py_operators:
    def __rpow__(self, other):
        return pow(other, self)
-    def zeros_like(self):
+    def zeros_like(self, dtype=None):
        # The second is needed for Elemwise ops to work right
-        return second(self, ScalarConstant(Scalar(str(self.type.dtype)), 0))
+        if dtype is None:
+            dtype = str(self.type.dtype)
+        return second(self, ScalarConstant(Scalar(dtype), 0))
    def astype(self, dtype):
        return cast(self, dtype)
@@ -1244,8 +1246,10 @@ class Maximum(BinaryScalarOp):
                '((%(x)s)>=(%(y)s)? (%(x)s): nan("")));' % locals())
    def grad(self, (x, y), (gz, )):
-        assert gz.type not in complex_types
+        if gz.type in complex_types:
-        # max is not defined for complex_types
+            # max is currently defined for complex_types,
+            # but the gradient for complex is not.
+            raise NotImplementedError()
        output = self(x, y)
@@ -1275,8 +1279,10 @@ class Minimum(BinaryScalarOp):
                '((%(x)s)<=(%(y)s)? (%(x)s): nan("")));' % locals())
    def grad(self, (x, y), (gz, )):
-        assert gz.type not in complex_types
+        if gz.type in complex_types:
-        # max is not defined for complex_types
+            # min is currently defined for complex_types,
+            # but the gradient for complex is not.
+            raise NotImplementedError()
        output = minimum(x, y)
        if output.type in discrete_types:
@@ -1562,7 +1568,8 @@ class IntDiv(BinaryScalarOp):
        return (2,)
    def grad(self, inputs, g_output):
-        return [None] * len(inputs)
+        return [inp.zeros_like(dtype=theano.config.floatX)
+                for inp in inputs]
 int_div = IntDiv(upcast_out, name='int_div')
@@ -1648,7 +1655,8 @@ class Mod(BinaryScalarOp):
            """) % locals()
    def grad(self, (x, y), (gz, )):
-        return None, None
+        return [x.zeros_like(dtype=theano.config.floatX),
+                y.zeros_like(dtype=theano.config.floatX)]
 mod = Mod(upcast_out, name='mod')
@@ -1886,10 +1894,13 @@ class Abs(UnaryScalarOp):
        return numpy.abs(x)
    def grad(self, (x, ), (gz, )):
-        if x.type in float_types + complex_types:
+        if self(x).type in discrete_types:
-            return gz * x / abs(x),  # formula works for complex and real
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * x / abs(x),  # formula works for complex and real
    def c_code(self, node, name, (x, ), (z, ), sub):
        type = node.inputs[0].type
@@ -2090,10 +2101,13 @@ class Neg(UnaryScalarOp):
        return -x
    def grad(self, (x,), (gz,)):
-        if x.type in continuous_types:
+        if self(x).type in discrete_types:
-            return -gz,
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return -gz,
    def c_code(self, node, name, (x,), (z,), sub):
        return "%(z)s = -%(x)s;" % locals()
@@ -2108,10 +2122,13 @@ class Inv(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return -gz / (x * x),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return -gz / (x * x),
    def c_code(self, node, name, (x,), (z,), sub):
        return "%(z)s = 1.0 / %(x)s;" % locals()
@@ -2129,10 +2146,13 @@ class Log(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz / x,
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz / x,
    def c_code(self, node, name, (x,), (z,), sub):
        #todo: the version using log2 seems to be very slightly faster
@@ -2155,10 +2175,13 @@ class Log2(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz / (x * math.log(2.0)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz / (x * math.log(2.0)),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2178,10 +2201,13 @@ class Log10(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz / (x * numpy.log(10.0)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None
+            else:
+                return [x.zeros_like()]
+        return gz / (x * numpy.log(10.0)),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2198,9 +2224,13 @@ class Log1p(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if gz.type in float_types:
+        if self(x).type in discrete_types:
-            return [gz / (1 + x)]
+            if x.type in discrete_types:
-        return [None]
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
+        return [gz / (1 + x)]
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2219,10 +2249,13 @@ class Exp(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * exp(x),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * exp(x),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2238,10 +2271,13 @@ class Exp2(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * exp2(x) * log(numpy.cast[x.type](2)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * exp2(x) * log(numpy.cast[x.type](2)),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2257,10 +2293,13 @@ class Expm1(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * exp(x),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * exp(x),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2276,10 +2315,13 @@ class Sqr(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * x * 2,
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * x * 2,
    def c_code(self, node, name, (x, ), (z, ), sub):
        return "%(z)s = %(x)s * %(x)s;" % locals()
@@ -2293,10 +2335,13 @@ class Sqrt(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return (gz * 0.5) / sqrt(x),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return (gz * 0.5) / sqrt(x),
    def c_code(self, node, name, (x,), (z,), sub):
        if node.inputs[0].type in complex_types:
@@ -2312,10 +2357,13 @@ class Deg2Rad(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * numpy.asarray(numpy.pi / 180, gz.type),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * numpy.asarray(numpy.pi / 180, gz.type),
    def c_code(self, node, name, (x,), (z,), sub):
        if node.inputs[0].type in complex_types:
@@ -2331,10 +2379,13 @@ class Rad2Deg(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * numpy.asarray(180. / numpy.pi, gz.type),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * numpy.asarray(180. / numpy.pi, gz.type),
    def c_code(self, node, name, (x,), (z,), sub):
        if node.inputs[0].type in complex_types:
@@ -2353,10 +2404,13 @@ class Cos(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return -gz * sin(x),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return -gz * sin(x),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2372,10 +2426,13 @@ class ArcCos(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return - gz / sqrt(numpy.cast[x.type](1) - sqr(x)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return - gz / sqrt(numpy.cast[x.type](1) - sqr(x)),
    def c_code(self, node, name, (x,), (z,), sub):
        if node.inputs[0].type in complex_types:
@@ -2394,10 +2451,13 @@ class Sin(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * cos(x),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * cos(x),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2413,10 +2473,13 @@ class ArcSin(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz / sqrt(numpy.cast[x.type](1) - sqr(x)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz / sqrt(numpy.cast[x.type](1) - sqr(x)),
    def c_code(self, node, name, (x,), (z,), sub):
        if node.inputs[0].type in complex_types:
@@ -2432,10 +2495,13 @@ class Tan(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz / sqr(cos(x)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz / sqr(cos(x)),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2451,10 +2517,13 @@ class ArcTan(UnaryScalarOp):
    def grad(self, (x,), (gz,)):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz / (numpy.cast[x.type](1) + sqr(x)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz / (numpy.cast[x.type](1) + sqr(x)),
    def c_code(self, node, name, (x,), (z,), sub):
        if node.inputs[0].type in complex_types:
@@ -2470,11 +2539,22 @@ class ArcTan2(BinaryScalarOp):
    def grad(self, (y, x), (gz,)):
        if gz.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types and y.type in float_types:
+        else:
+            if self(x, y).type in discrete_types:
+                if x.type in discrete_types:
+                    gx = x.zeros_like(dtype=theano.config.floatX)
+                else:
+                    gx = x.zeros_like()
+                if y.type in discrete_types:
+                    gy = y.zeros_like(dtype=theano.config.floatX)
+                else:
+                    gy = y.zeros_like()
+                return [gx, gy]
+            # If the output is float, the gradient should flow,
+            # even if the inputs are ints
            return [gz * x / (sqr(x) + sqr(y)),
                    gz * neg(y) / (sqr(x) + sqr(y))]
-        else:
-            return None,
    def c_code(self, node, name, (y, x), (z,), sub):
        if (node.inputs[0].type in complex_types or
@@ -2494,10 +2574,13 @@ class Cosh(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * sinh(x),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * sinh(x),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2513,10 +2596,13 @@ class ArcCosh(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz / sqrt(sqr(x) - numpy.cast[x.type](1)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz / sqrt(sqr(x) - numpy.cast[x.type](1)),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2535,10 +2621,13 @@ class Sinh(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * cosh(x),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * cosh(x),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2554,10 +2643,13 @@ class ArcSinh(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz / sqrt(sqr(x) + numpy.cast[x.type](1)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz / sqrt(sqr(x) + numpy.cast[x.type](1)),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2577,10 +2669,13 @@ class Tanh(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz * (1 - sqr(tanh(x))),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz * (1 - sqr(tanh(x))),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:
@@ -2596,10 +2691,13 @@ class ArcTanh(UnaryScalarOp):
    def grad(self, (x, ), (gz, )):
        if x.type in complex_types:
            raise NotImplementedError()
-        if x.type in float_types:
+        if self(x).type in discrete_types:
-            return gz / (numpy.cast[x.type](1) - sqr(x)),
+            if x.type in discrete_types:
-        else:
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return None,
+            else:
+                return [x.zeros_like()]
+        return gz / (numpy.cast[x.type](1) - sqr(x)),
    def c_code(self, node, name, (x, ), (z, ), sub):
        if node.inputs[0].type in complex_types:

--- a/theano/scalar/basic_scipy.py
+++ b/theano/scalar/basic_scipy.py
@@ -2,11 +2,12 @@
 #as scipy is not always available, we treat them separatly
 import numpy
+import theano
 from theano.scalar.basic import (UnaryScalarOp, BinaryScalarOp,
                                 exp, upgrade_to_float,
                                 float_types)
 from theano.scalar.basic import (upgrade_to_float_no_complex,
-                                 complex_types,
+                                 complex_types, discrete_types,
                                 upcast)
 imported_scipy_special = False
@@ -32,12 +33,15 @@ class Erf(UnaryScalarOp):
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
+        if self(x).type in discrete_types:
-            cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
+            if x.type in discrete_types:
-                                dtype=upcast(x.type.dtype, gz.type.dtype))
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return gz * cst * exp(-x * x),
+            else:
-        else:
+                return [x.zeros_like()]
-            return None,
+        cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return gz * cst * exp(-x * x),
    def c_code(self, node, name, inp, out, sub):
        x, = inp
@@ -60,12 +64,15 @@ class Erfc(UnaryScalarOp):
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
+        if self(x).type in discrete_types:
-            cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
+            if x.type in discrete_types:
-                                dtype=upcast(x.type.dtype, gz.type.dtype))
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return - gz * cst * exp(-x * x),
+            else:
-        else:
+                return [x.zeros_like()]
-            return None,
+        cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return - gz * cst * exp(-x * x),
    def c_code(self, node, name, inp, out, sub):
        x, = inp
@@ -99,12 +106,15 @@ class Erfinv(UnaryScalarOp):
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
+        if self(x).type in discrete_types:
-            cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
+            if x.type in discrete_types:
-                                dtype=upcast(x.type.dtype, gz.type.dtype))
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return gz * cst * exp(erfinv(x) ** 2),
+            else:
-        else:
+                return [x.zeros_like()]
-            return None,
+        cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return gz * cst * exp(erfinv(x) ** 2),
    # TODO: erfinv() is not provided by the C standard library
    #def c_code(self, node, name, inp, out, sub):
@@ -129,12 +139,15 @@ class Erfcinv(UnaryScalarOp):
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
+        if self(x).type in discrete_types:
-            cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
+            if x.type in discrete_types:
-                                dtype=upcast(x.type.dtype, gz.type.dtype))
+                return [x.zeros_like(dtype=theano.config.floatX)]
-            return - gz * cst * exp(erfcinv(x) ** 2),
+            else:
-        else:
+                return [x.zeros_like()]
-            return None,
+        cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return - gz * cst * exp(erfcinv(x) ** 2),
    # TODO: erfcinv() is not provided by the C standard library
    #def c_code(self, node, name, inp, out, sub):
@@ -159,6 +172,14 @@ class Gamma(UnaryScalarOp):
            super(Gamma, self).impl(x)
    def grad(self, (x, ), (gz, )):
+        if x.type in complex_types:
+            raise NotImplementedError()
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
        return gz * gamma(x) * psi(x),
    def c_code(self, node, name, (x, ), (z, ), sub):
@@ -190,6 +211,14 @@ class GammaLn(UnaryScalarOp):
    def grad(self, inp, grads):
        x, = inp
        gz, = grads
+        if x.type in complex_types:
+            raise NotImplementedError()
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
        return [gz * psi(x)]
    def c_code(self, node, name, inp, out, sub):
@@ -224,7 +253,6 @@ class Psi(UnaryScalarOp):
    def grad(self, inputs, outputs_gradients):
        raise NotImplementedError()
-        return [None]
    def c_support_code(self):
        return (

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -419,6 +419,54 @@ def makeTester(name, op, expected, checks=None, good=None, bad_build=None,
            finally:
                config.warn.sum_div_dimshuffle_bug = backup
+        def test_grad_none(self):
+            # Check that None is never returned as input gradient
+            # when calling self.op.grad
+            # We use all values in self.good because this has to be true
+            # whether or not the values work for utt.verify_grad.
+            if skip:
+                raise SkipTest(skip)
+            if not hasattr(self.op, 'grad'):
+                # This is not actually an Op
+                return
+            for testname, inputs in self.good.items():
+                inputs = [copy(input) for input in inputs]
+                inputrs = [TensorType(
+                            dtype=input.dtype,
+                            broadcastable=[shape_elem == 1
+                                           for shape_elem in input.shape]
+                            )() for input in inputs]
+                if (isinstance(self.expected, dict)
+                        and testname in self.expected):
+                    expecteds = self.expected[testname]
+                    # with numpy version, when we print a number and read it
+                    # back, we don't get exactly the same result, so we accept
+                    # rounding error in that case.
+                else:
+                    expecteds = self.expected(*inputs)
+                if not isinstance(expecteds, (list, tuple)):
+                    expecteds = (expecteds, )
+                out_grad_vars = []
+                for out in expecteds:
+                    if str(out.dtype) in tensor.discrete_dtypes:
+                        dtype = floatX
+                    else:
+                        dtype = str(out.dtype)
+                    bcast = [shape_elem == 1 for shape_elem in out.shape]
+                    var = TensorType(dtype=dtype, broadcastable=bcast)()
+                    out_grad_vars.append(var)
+                try:
+                    in_grad_vars = self.op.grad(inputrs, out_grad_vars)
+                except (gof.utils.MethodNotDefined, NotImplementedError):
+                    pass
+                else:
+                    assert None not in in_grad_vars
    Checker.__name__ = name
    return Checker