Merge pull request #1919 from nouiz/crash_fix_broadcast

Crash fix broadcast

Merge pull request #1919 from nouiz/crash_fix_broadcast
8eeaea6c · abergeron · 4b60641c · b0572f5c · 8eeaea6c · 8eeaea6c
--- a/theano/sandbox/gpuarray/subtensor.py
+++ b/theano/sandbox/gpuarray/subtensor.py
@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise
 from theano.sandbox.gpuarray.comp import NVCC_compiler
 class GpuSubtensor(HideC, Subtensor):
    def make_node(self, x, *inputs):
        rval = tensor.Subtensor.make_node(self, x, *inputs)
@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor):
    def perform(self, node, inputs, out_):
        out, = out_
        x = inputs[0]
-        if self.perform_cache_cdata is not None:
-            out[0] = x.__getitem__(self.perform_cache_cdata)
-            return
        cdata = get_idx_list(inputs, self.idx_list)
        if len(cdata) == 1:
            cdata = cdata[0]
-        if len(inputs) == 1:
-            self.perform_cache_cdata = cdata
        out[0] = x.__getitem__(cdata)
@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor):
            # scalar case
            if not self.set_instead_of_inc:
                #x.__setitem__(cdata, sub_x + y)
-                tmp = pygpu.elemwise.elemwise2(sub_x, '+', y,  sub_x, broadcast=False)
+                tmp = pygpu.elemwise.elemwise2(sub_x, '+', y,  sub_x,
+                                               broadcast=False)
                x.__setitem__(cdata, tmp)
            else:
                x.__setitem__(cdata, y)
@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
                return;
        }
-        """ %locals()
+        """ % locals()
--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -64,6 +64,7 @@ def make_constant(args):
                return a
    return tuple(map(conv, args))
 def get_idx_list(inputs, idx_list):
    '''
    Given a list of inputs to the subtensor and its idx_list reorders
@@ -395,6 +396,7 @@ class Subtensor(Op):
            NotScalarConstantError: v
        """
        real_idx = get_idx_list(inputs, self.idx_list)
        def conv(val):
            if val is None:
                return None
@@ -441,7 +443,8 @@ class Subtensor(Op):
            raise exception
        input_types = Subtensor.collapse(idx_list,
-                lambda entry: isinstance(entry, gof.Type))
+                                         lambda entry: isinstance(entry,
+                                                                  gof.Type))
        if len(inputs) != len(input_types):
            raise IndexError(
                "Not enough inputs to fill in the Subtensor template.",
@@ -693,7 +696,7 @@ class Subtensor(Op):
        npy_intp xview_dims[%(view_ndim)s];
        npy_intp xview_strides[%(view_ndim)s];
-        """% locals()
+        """ % locals()
        else:
            rval = """
        // Argument of the view
@@ -987,7 +990,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
    if y.ndim > x.ndim:
        raise TypeError(("Trying to increment a %d-dimensional "
-            "subtensor with a %d-dimensional value.") % (x.ndim, y.ndim))
+                         "subtensor with a %d-dimensional value.") % (x.ndim,
+                                                                      y.ndim))
    for dim in range(y.ndim):
        dim_offset = x.ndim - y.ndim
@@ -1042,7 +1046,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
        # return something that has the same shape as x, not as x.T (inner_x).
        # So re-apply the outer dimshuffle on the new inc_subtensor,
        # and return advanced_inc_subtensor1(x.T, i, y).T.
-        inner_incsubtensor = inc_subtensor(inner_x, y,
+        inner_incsubtensor = inc_subtensor(
+            inner_x, y,
            inplace=inplace,
            set_instead_of_inc=set_instead_of_inc,
            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
@@ -1052,7 +1057,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
        # Try to apply inc_subtensor on inner_x.
        # If it works, there is no need to reshape, as the inc_subtensor
        # will have the same shape as inner_x, which is what we want.
-        inner_incsubtensor = inc_subtensor(inner_x, y.flatten(),
+        inner_incsubtensor = inc_subtensor(
+            inner_x, y.flatten(),
            inplace=inplace,
            set_instead_of_inc=set_instead_of_inc,
            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
@@ -1140,8 +1146,8 @@ class IncSubtensor(Op):
        x, y = map(theano.tensor.as_tensor_variable, [x, y])
        if y.ndim > x.ndim:
            raise ValueError(("Trying to increment a %d-dimensional "
-                "subtensor with a %d-dimensional value.") % (x.ndim,
+                              "subtensor with a %d-dimensional value.") % (
-                    y.ndim))
+                                  x.ndim, y.ndim))
        inputs = tuple(map(Subtensor.my_as_scalar, inputs))
        idx_list = list(self.idx_list)
@@ -1153,7 +1159,8 @@ class IncSubtensor(Op):
            exception.subtensor_invalid = True
            raise exception
-        input_types = Subtensor.collapse(idx_list,
+        input_types = Subtensor.collapse(
+            idx_list,
            lambda entry: isinstance(entry, gof.Type))
        if len(inputs) != len(input_types):
            raise IndexError(
@@ -1442,6 +1449,25 @@ class IncSubtensor(Op):
        else:
            gx = g_output
        gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list)
+        if gy.broadcastable != y.broadcastable:
+            y_broad = (True,) * (gy.ndim - y.ndim) + y.broadcastable
+            assert sum(gy.broadcastable) < sum(y_broad)
+            axis_to_sum = []
+            for i in range(gy.ndim):
+                if gy.broadcastable[i] is False and y_broad[i] is True:
+                    axis_to_sum.append(i)
+                elif (gy.broadcastable[i] is True and
+                      y_broad[i] is False):
+                    # This mean that THeano where able to infer that
+                    # gy.shape[i] is 1, so y.shape[i] is 1, but we
+                    # didn't know it. It is fine.
+                    pass
+                else:
+                    assert gy.broadcastable[i] == y_broad[i]
+            gy = gy.sum(axis=axis_to_sum, keepdims=True)
+            if gy.ndim != y.ndim:
+                gy = gy.dimshuffle(*range(y.ndim, gy.ndim))
+            assert gy.broadcastable == y.broadcastable
        return [gx, gy] + [DisconnectedType()()] * len(idx_list)

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -378,6 +378,26 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        good[subi:, subi] = numpy.exp(data[subi:, subi])
        self.assertTrue(numpy.allclose(gval, good), (gval, good))
+    def test_grad_2d_inc_set_subtensor(self):
+        for n_shape, m_shape in [
+            [(2, 3), (2, 2)],
+            [(3, 2), (2, 2)],
+            [(3, 2), (1, 2)],
+            [(3, 2), (2,)],
+        ]:
+            for op in [inc_subtensor, set_subtensor]:
+                subi = 2
+                data = numpy.asarray(rand(*n_shape), dtype=self.dtype)
+                n = self.shared(data)
+                z = scal.constant(subi)
+                m = matrix('m', dtype=self.dtype)
+                mv = numpy.asarray(rand(*m_shape), dtype=self.dtype)
+                t = op(n[:z, :z], m)
+                gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m])
+                utt.verify_grad(lambda m: op(n[:z, :z], m), [mv])
+                utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data])
    def test_grad_0d(self):
        data = numpy.asarray(rand(2, 3), dtype=self.dtype)
        n = self.shared(data)

--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code(
        TensorType,
        """
        if(!%(oname)s)
-            %(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
+            %(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
        ((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
        """,
-        version=1)
+        version=2)
 # Register TensorType C code for DeepCopyOp
 theano.compile.register_deep_copy_op_c_code(