Merge pull request #2364 from lamblin/scan_nonseq_grad

[WIP] Detect Null gradient wrt non-sequences in scan

Merge pull request #2364 from lamblin/scan_nonseq_grad
787133a7 · Pascal Lamblin · e9fee711 · cc264051 · 787133a7 · 787133a7
--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -1883,13 +1883,27 @@ class Scan(PureOp):
                type_outs.append('disconnected')
            else:
                type_outs.append('connected')
+
        inner_inp_sitsot = dC_dXtm1s[ins_pos - self.n_seqs:]
-        outer_inp_sitsot = [
-            tensor.zeros([grad_steps + 1] +
-                         [x.shape[i] for i in xrange(x.ndim)],
-                         dtype=y.dtype)
-            for y, x in zip(inner_inp_sitsot,
-                            self.outer_non_seqs(inputs))]
+        outer_inp_sitsot = []
+        for _idx, y in enumerate(inner_inp_sitsot):
+            x = self.outer_non_seqs(inputs)[_idx]
+            if isinstance(y.type, NullType):
+                # Cannot use dC_dXtm1s.dtype, so we use floatX instead.
+                outer_inp_sitsot.append(
+                    tensor.zeros([grad_steps + 1] +
+                                 [x.shape[i] for i in xrange(x.ndim)],
+                                 dtype=theano.config.floatX))
+                # replace y by a zero tensor of the right shape
+                inner_inp_sitsot[_idx] = tensor.zeros(
+                    diff_inputs[ins_pos + _idx].shape,
+                    dtype=theano.config.floatX)
+
+            else:
+                outer_inp_sitsot.append(
+                    tensor.zeros([grad_steps + 1] +
+                                 [x.shape[i] for i in xrange(x.ndim)],
+                                 dtype=y.dtype))

        n_sitsot_outs = len(outer_inp_sitsot)
        new_tap_array = mitmot_inp_taps + [[-1] for k in

--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -10,6 +10,7 @@ import cPickle
 import numpy
 from nose.plugins.skip import SkipTest
 from nose.plugins.attrib import attr
+from nose.tools import assert_raises
 from numpy.testing import dec

 import theano
@@ -3122,6 +3123,38 @@ class T_Scan(unittest.TestCase):
        assert out[4] == 19
        # 19.0

+    def test_crash_nonseq_grad(self):
+        # Test case was originally reported by Bitton Tenessi. It crashed
+        # during the grad operation and this tests validates that it now
+        # raises a NullTypeGradError instead because the gradient relies on
+        # the intermediary states of the random number generators used in the
+        # test. The test case was modified from the original for simplicity
+
+        rand_stream = tensor.shared_randomstreams.RandomStreams()
+        inp = tensor.matrix()
+        norm_inp = inp / tensor.sum(inp, axis=0)
+
+        def unit_dropout(out_idx):
+            def stochastic_pooling(in_idx):
+                # sample the input matrix for each column according to the
+                # column values
+                pvals = norm_inp.T
+                sample = rand_stream.multinomial(n=1, pvals=pvals)
+                return inp + sample
+
+            pooled, updates_inner = theano.scan(fn=stochastic_pooling,
+                                        sequences=tensor.arange(inp.shape[0]))
+
+            # randomly add stuff to units
+            rand_nums = rand_stream.binomial(size=pooled.shape)
+            return pooled + rand_nums, updates_inner
+
+        out, updates_outer = theano.scan(unit_dropout,
+                                     sequences=[tensor.arange(inp.shape[0])])
+
+        assert_raises(theano.gradient.NullTypeGradError,
+                      tensor.grad, out.sum(), inp)
+
    def test_bugFunctioProvidesIntermediateNodesAsInputs(self):
        # This is a bug recently reported by Ilya
        # made it CPU friendly