Merge pull request #1537 from lamblin/fix_grad_none

Do not return None in scalar op's grad, add tests

Merge pull request #1537 from lamblin/fix_grad_none
14ab4945 · Frédéric Bastien · 6937f122 · 8e5725ea · 14ab4945 · 14ab4945
--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
--- a/theano/scalar/basic_scipy.py
+++ b/theano/scalar/basic_scipy.py
@@ -2,11 +2,12 @@
 #as scipy is not always available, we treat them separatly
 import numpy

+import theano
 from theano.scalar.basic import (UnaryScalarOp, BinaryScalarOp,
                                 exp, upgrade_to_float,
                                 float_types)
 from theano.scalar.basic import (upgrade_to_float_no_complex,
-                                 complex_types,
+                                 complex_types, discrete_types,
                                 upcast)

 imported_scipy_special = False
@@ -32,12 +33,15 @@ class Erf(UnaryScalarOp):
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
-            cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
-                                dtype=upcast(x.type.dtype, gz.type.dtype))
-            return gz * cst * exp(-x * x),
-        else:
-            return None,
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
+
+        cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return gz * cst * exp(-x * x),

    def c_code(self, node, name, inp, out, sub):
        x, = inp
@@ -60,12 +64,15 @@ class Erfc(UnaryScalarOp):
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
-            cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
-                                dtype=upcast(x.type.dtype, gz.type.dtype))
-            return - gz * cst * exp(-x * x),
-        else:
-            return None,
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
+
+        cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return - gz * cst * exp(-x * x),

    def c_code(self, node, name, inp, out, sub):
        x, = inp
@@ -99,12 +106,15 @@ class Erfinv(UnaryScalarOp):
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
-            cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
-                                dtype=upcast(x.type.dtype, gz.type.dtype))
-            return gz * cst * exp(erfinv(x) ** 2),
-        else:
-            return None,
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
+
+        cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return gz * cst * exp(erfinv(x) ** 2),

    # TODO: erfinv() is not provided by the C standard library
    #def c_code(self, node, name, inp, out, sub):
@@ -129,12 +139,15 @@ class Erfcinv(UnaryScalarOp):
        gz, = grads
        if x.type in complex_types:
            raise NotImplementedError()
-        elif x.type in float_types:
-            cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
-                                dtype=upcast(x.type.dtype, gz.type.dtype))
-            return - gz * cst * exp(erfcinv(x) ** 2),
-        else:
-            return None,
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
+
+        cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
+                            dtype=upcast(x.type.dtype, gz.type.dtype))
+        return - gz * cst * exp(erfcinv(x) ** 2),

    # TODO: erfcinv() is not provided by the C standard library
    #def c_code(self, node, name, inp, out, sub):
@@ -159,6 +172,14 @@ class Gamma(UnaryScalarOp):
            super(Gamma, self).impl(x)

    def grad(self, (x, ), (gz, )):
+        if x.type in complex_types:
+            raise NotImplementedError()
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
+
        return gz * gamma(x) * psi(x),

    def c_code(self, node, name, (x, ), (z, ), sub):
@@ -190,6 +211,14 @@ class GammaLn(UnaryScalarOp):
    def grad(self, inp, grads):
        x, = inp
        gz, = grads
+        if x.type in complex_types:
+            raise NotImplementedError()
+        if self(x).type in discrete_types:
+            if x.type in discrete_types:
+                return [x.zeros_like(dtype=theano.config.floatX)]
+            else:
+                return [x.zeros_like()]
+
        return [gz * psi(x)]

    def c_code(self, node, name, inp, out, sub):
@@ -224,7 +253,6 @@ class Psi(UnaryScalarOp):

    def grad(self, inputs, outputs_gradients):
        raise NotImplementedError()
-        return [None]

    def c_support_code(self):
        return (

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -419,6 +419,54 @@ def makeTester(name, op, expected, checks=None, good=None, bad_build=None,
            finally:
                config.warn.sum_div_dimshuffle_bug = backup

+        def test_grad_none(self):
+            # Check that None is never returned as input gradient
+            # when calling self.op.grad
+            # We use all values in self.good because this has to be true
+            # whether or not the values work for utt.verify_grad.
+            if skip:
+                raise SkipTest(skip)
+
+            if not hasattr(self.op, 'grad'):
+                # This is not actually an Op
+                return
+
+            for testname, inputs in self.good.items():
+                inputs = [copy(input) for input in inputs]
+                inputrs = [TensorType(
+                            dtype=input.dtype,
+                            broadcastable=[shape_elem == 1
+                                           for shape_elem in input.shape]
+                            )() for input in inputs]
+
+                if (isinstance(self.expected, dict)
+                        and testname in self.expected):
+                    expecteds = self.expected[testname]
+                    # with numpy version, when we print a number and read it
+                    # back, we don't get exactly the same result, so we accept
+                    # rounding error in that case.
+                else:
+                    expecteds = self.expected(*inputs)
+                if not isinstance(expecteds, (list, tuple)):
+                    expecteds = (expecteds, )
+
+                out_grad_vars = []
+                for out in expecteds:
+                    if str(out.dtype) in tensor.discrete_dtypes:
+                        dtype = floatX
+                    else:
+                        dtype = str(out.dtype)
+                    bcast = [shape_elem == 1 for shape_elem in out.shape]
+                    var = TensorType(dtype=dtype, broadcastable=bcast)()
+                    out_grad_vars.append(var)
+
+                try:
+                    in_grad_vars = self.op.grad(inputrs, out_grad_vars)
+                except (gof.utils.MethodNotDefined, NotImplementedError):
+                    pass
+                else:
+                    assert None not in in_grad_vars
+
    Checker.__name__ = name
    return Checker