Merge pull request #1919 from nouiz/crash_fix_broadcast

Crash fix broadcast

Merge pull request #1919 from nouiz/crash_fix_broadcast
8eeaea6c · abergeron · 4b60641c · b0572f5c · 8eeaea6c · 8eeaea6c
--- a/theano/sandbox/gpuarray/subtensor.py
+++ b/theano/sandbox/gpuarray/subtensor.py
@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise
 from theano.sandbox.gpuarray.comp import NVCC_compiler


-
 class GpuSubtensor(HideC, Subtensor):
    def make_node(self, x, *inputs):
        rval = tensor.Subtensor.make_node(self, x, *inputs)
@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor):
    def perform(self, node, inputs, out_):
        out, = out_
        x = inputs[0]
-        if self.perform_cache_cdata is not None:
-            out[0] = x.__getitem__(self.perform_cache_cdata)
-            return

        cdata = get_idx_list(inputs, self.idx_list)
        if len(cdata) == 1:
            cdata = cdata[0]
-        if len(inputs) == 1:
-            self.perform_cache_cdata = cdata

        out[0] = x.__getitem__(cdata)

@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor):
            # scalar case
            if not self.set_instead_of_inc:
                #x.__setitem__(cdata, sub_x + y)
-                tmp = pygpu.elemwise.elemwise2(sub_x, '+', y,  sub_x, broadcast=False)
+                tmp = pygpu.elemwise.elemwise2(sub_x, '+', y,  sub_x,
+                                               broadcast=False)
                x.__setitem__(cdata, tmp)
            else:
                x.__setitem__(cdata, y)
@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
                return;
        }

-        """ %locals()
+        """ % locals()
--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -88,7 +88,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        f = inplace_func([], t, mode=self.mode)
        topo = f.maker.fgraph.toposort()
        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
+                                                         self.ignore_topo)]
        assert len(topo_) == 1
        if not list:
            assert isinstance(topo_[0].op, self.sub)
@@ -365,19 +365,39 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        f = inplace_func([], gn, mode=self.mode)
        topo = f.maker.fgraph.toposort()
        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
+                                                         self.ignore_topo)]
        if not self.fast_compile:
            assert len(topo_) == 6
        assert numpy.sum([isinstance(node.op, self.inc_sub)
-             for node in topo_]) == 1
+                          for node in topo_]) == 1
        assert numpy.sum([isinstance(node.op, self.sub)
-             for node in topo_]) == 1
+                          for node in topo_]) == 1
        gval = f()

        good = numpy.zeros_like(data)
        good[subi:, subi] = numpy.exp(data[subi:, subi])
        self.assertTrue(numpy.allclose(gval, good), (gval, good))

+    def test_grad_2d_inc_set_subtensor(self):
+        for n_shape, m_shape in [
+            [(2, 3), (2, 2)],
+            [(3, 2), (2, 2)],
+            [(3, 2), (1, 2)],
+            [(3, 2), (2,)],
+        ]:
+            for op in [inc_subtensor, set_subtensor]:
+                subi = 2
+                data = numpy.asarray(rand(*n_shape), dtype=self.dtype)
+                n = self.shared(data)
+                z = scal.constant(subi)
+                m = matrix('m', dtype=self.dtype)
+                mv = numpy.asarray(rand(*m_shape), dtype=self.dtype)
+
+                t = op(n[:z, :z], m)
+                gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m])
+                utt.verify_grad(lambda m: op(n[:z, :z], m), [mv])
+                utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data])
+
    def test_grad_0d(self):
        data = numpy.asarray(rand(2, 3), dtype=self.dtype)
        n = self.shared(data)

--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code(
        TensorType,
        """
        if(!%(oname)s)
-            %(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
+            %(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
        ((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
        """,
-        version=1)
+        version=2)

 # Register TensorType C code for DeepCopyOp
 theano.compile.register_deep_copy_op_c_code(