Merge pull request #1919 from nouiz/crash_fix_broadcast

Crash fix broadcast

Merge pull request #1919 from nouiz/crash_fix_broadcast
8eeaea6c · abergeron · 4b60641c · b0572f5c · 8eeaea6c · 8eeaea6c
--- a/theano/sandbox/gpuarray/subtensor.py
+++ b/theano/sandbox/gpuarray/subtensor.py
@@ -20,7 +20,6 @@ from theano.sandbox.gpuarray.elemwise import GpuElemwise
 from theano.sandbox.gpuarray.comp import NVCC_compiler


-
 class GpuSubtensor(HideC, Subtensor):
    def make_node(self, x, *inputs):
        rval = tensor.Subtensor.make_node(self, x, *inputs)
@@ -32,15 +31,10 @@ class GpuSubtensor(HideC, Subtensor):
    def perform(self, node, inputs, out_):
        out, = out_
        x = inputs[0]
-        if self.perform_cache_cdata is not None:
-            out[0] = x.__getitem__(self.perform_cache_cdata)
-            return

        cdata = get_idx_list(inputs, self.idx_list)
        if len(cdata) == 1:
            cdata = cdata[0]
-        if len(inputs) == 1:
-            self.perform_cache_cdata = cdata

        out[0] = x.__getitem__(cdata)

@@ -232,7 +226,8 @@ class GpuIncSubtensor(IncSubtensor):
            # scalar case
            if not self.set_instead_of_inc:
                #x.__setitem__(cdata, sub_x + y)
-                tmp = pygpu.elemwise.elemwise2(sub_x, '+', y,  sub_x, broadcast=False)
+                tmp = pygpu.elemwise.elemwise2(sub_x, '+', y,  sub_x,
+                                               broadcast=False)
                x.__setitem__(cdata, tmp)
            else:
                x.__setitem__(cdata, y)
@@ -592,4 +587,4 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
                return;
        }

-        """ %locals()
+        """ % locals()
--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -64,6 +64,7 @@ def make_constant(args):
                return a
    return tuple(map(conv, args))

+
 def get_idx_list(inputs, idx_list):
    '''
    Given a list of inputs to the subtensor and its idx_list reorders
@@ -81,8 +82,8 @@ def get_idx_list(inputs, idx_list):
            return indices.pop()
        elif isinstance(entry, slice):
            return slice(convert(entry.start),
-                     convert(entry.stop),
-                     convert(entry.step))
+                         convert(entry.stop),
+                         convert(entry.step))
        else:
            return entry
    cdata = tuple(map(convert, idx_list))
@@ -125,13 +126,13 @@ def get_canonical_form_slice(theslice, length):
        # in the generic case below.
        if step == 1:
            is_start_0 = (
-                    start in [None, 0] or
-                    (is_start_constant and is_length_constant and
-                     start < 0 and start + length <= 0))
+                start in [None, 0] or
+                (is_start_constant and is_length_constant and
+                 start < 0 and start + length <= 0))
            is_stop_length = (
-                    stop in [None, length, maxsize] or
-                    (is_stop_constant and is_length_constant and
-                     stop >= length))
+                stop in [None, length, maxsize] or
+                (is_stop_constant and is_length_constant and
+                 stop >= length))
            if is_start_0:
                # 0:stop:1
                if is_stop_length:
@@ -395,6 +396,7 @@ class Subtensor(Op):
            NotScalarConstantError: v
        """
        real_idx = get_idx_list(inputs, self.idx_list)
+
        def conv(val):
            if val is None:
                return None
@@ -441,11 +443,12 @@ class Subtensor(Op):
            raise exception

        input_types = Subtensor.collapse(idx_list,
-                lambda entry: isinstance(entry, gof.Type))
+                                         lambda entry: isinstance(entry,
+                                                                  gof.Type))
        if len(inputs) != len(input_types):
            raise IndexError(
-                    "Not enough inputs to fill in the Subtensor template.",
-                    inputs, idx_list)
+                "Not enough inputs to fill in the Subtensor template.",
+                inputs, idx_list)
        for input, expected_type in izip(inputs, input_types):
            if input.type != expected_type:
                raise TypeError(
@@ -473,7 +476,7 @@ class Subtensor(Op):
        return gof.Apply(self,
                         (x, ) + inputs,
                         [theano.tensor.tensor(dtype=x.type.dtype,
-                                 broadcastable=broadcastable)])
+                                               broadcastable=broadcastable)])

    def perform(self, node, inputs, out_):
        out, = out_
@@ -592,7 +595,7 @@ class Subtensor(Op):
    def helper_c_code(node, name, inputs, outputs, sub, idx_list, view_ndim,
                      c_prefix=None,
                      strides_mul=None,
-                  ):
+    ):
        """
        The parameters c_prefix are there to allow reusing this
        function on PyArray and CudaNdarray object.
@@ -637,23 +640,23 @@ class Subtensor(Op):
        def init_entry(entry, depth=0):
            if isinstance(entry, (numpy.integer, int)):
                init_cmds.append(
-                        "subtensor_spec[%i] = %i;" % (spec_pos(),
-                            entry))
+                    "subtensor_spec[%i] = %i;" % (spec_pos(),
+                                                  entry))
                inc_spec_pos(1)
                if depth == 0:
                    is_slice.append(0)
            elif isinstance(entry, Type):
                init_cmds.append(
-                        "subtensor_spec[%i] = %s;" % (spec_pos(),
-                            inputs[input_pos()]))
+                    "subtensor_spec[%i] = %s;" % (spec_pos(),
+                                                  inputs[input_pos()]))
                inc_spec_pos(1)
                inc_input_pos(1)
                if depth == 0:
                    is_slice.append(0)
            elif entry is None:
                init_cmds.append(
-                        "subtensor_spec[%i] = %i;" % (spec_pos(),
-                            NONE_CODE))
+                    "subtensor_spec[%i] = %i;" % (spec_pos(),
+                                                  NONE_CODE))
                inc_spec_pos(1)
                if depth == 0:
                    is_slice.append(0)
@@ -686,26 +689,26 @@ class Subtensor(Op):

        x, = inputs[:1]
        z, = outputs
-        
+
        if view_ndim:
-            rval = """        
+            rval = """
        // Argument of the view
        npy_intp xview_dims[%(view_ndim)s];
        npy_intp xview_strides[%(view_ndim)s];
-        
-        """% locals()
+
+        """ % locals()
        else:
-             rval = """        
+            rval = """
        // Argument of the view
        npy_intp* xview_dims = NULL;
        npy_intp* xview_strides = NULL;
-        
+
        """

        rval += """
        // One more argument of the view
        npy_intp xview_offset = 0;
-        
+
        // The subtensor is created by iterating over the dimensions
        // and updating stride, shape, and data pointers

@@ -716,7 +719,7 @@ class Subtensor(Op):
        int inner_ii = 0; // the current dimension of zview
        int outer_ii = 0; // current dimension of z

-        
+
        for (; outer_ii < %(len_is_slice)s; ++outer_ii)
        {
            if (is_slice[outer_ii])
@@ -944,11 +947,11 @@ class SubtensorPrinter:
            raise TypeError("Can only print Subtensor.")

 pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor),
-        SubtensorPrinter())
+              SubtensorPrinter())


 def set_subtensor(x, y, inplace=False,
-        tolerate_inplace_aliasing=False):
+                  tolerate_inplace_aliasing=False):
    """Return x with the given subtensor overwritten by y.

    Example: To replicate the numpy expression "r[10:] = 5", type
@@ -960,11 +963,11 @@ def set_subtensor(x, y, inplace=False,
    :param tolerate_inplace_aliasing: see inc_subtensor for documentation.
    """
    return inc_subtensor(x, y, inplace, set_instead_of_inc=True,
-            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
+                         tolerate_inplace_aliasing=tolerate_inplace_aliasing)


 def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
-        tolerate_inplace_aliasing=False):
+                  tolerate_inplace_aliasing=False):
    """Return x with the given subtensor incremented by y.

    :param x: the symbolic result of a Subtensor operation.
@@ -987,7 +990,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,

    if y.ndim > x.ndim:
        raise TypeError(("Trying to increment a %d-dimensional "
-            "subtensor with a %d-dimensional value.") % (x.ndim, y.ndim))
+                         "subtensor with a %d-dimensional value.") % (x.ndim,
+                                                                      y.ndim))

    for dim in range(y.ndim):
        dim_offset = x.ndim - y.ndim
@@ -1042,20 +1046,22 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
        # return something that has the same shape as x, not as x.T (inner_x).
        # So re-apply the outer dimshuffle on the new inc_subtensor,
        # and return advanced_inc_subtensor1(x.T, i, y).T.
-        inner_incsubtensor = inc_subtensor(inner_x, y,
-                inplace=inplace,
-                set_instead_of_inc=set_instead_of_inc,
-                tolerate_inplace_aliasing=tolerate_inplace_aliasing)
+        inner_incsubtensor = inc_subtensor(
+            inner_x, y,
+            inplace=inplace,
+            set_instead_of_inc=set_instead_of_inc,
+            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
        return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
    elif isinstance(x.owner.op, theano.tensor.Reshape):
        inner_x = x.owner.inputs[0]
        # Try to apply inc_subtensor on inner_x.
        # If it works, there is no need to reshape, as the inc_subtensor
        # will have the same shape as inner_x, which is what we want.
-        inner_incsubtensor = inc_subtensor(inner_x, y.flatten(),
-                inplace=inplace,
-                set_instead_of_inc=set_instead_of_inc,
-                tolerate_inplace_aliasing=tolerate_inplace_aliasing)
+        inner_incsubtensor = inc_subtensor(
+            inner_x, y.flatten(),
+            inplace=inplace,
+            set_instead_of_inc=set_instead_of_inc,
+            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
        return inner_incsubtensor
    else:
        raise TypeError('x must be the result of a subtensor operation')
@@ -1077,7 +1083,7 @@ class IncSubtensor(Op):
    check_input = False

    def __init__(self, idx_list, inplace=False, set_instead_of_inc=False,
-            destroyhandler_tolerate_aliased=None):
+                 destroyhandler_tolerate_aliased=None):
        if destroyhandler_tolerate_aliased is None:
            destroyhandler_tolerate_aliased = []
        self.idx_list = map(Subtensor.convert, idx_list)
@@ -1085,7 +1091,7 @@ class IncSubtensor(Op):
        if inplace:
            self.destroy_map = {0: [0]}
        self.destroyhandler_tolerate_aliased = list(
-                destroyhandler_tolerate_aliased)
+            destroyhandler_tolerate_aliased)
        self.set_instead_of_inc = set_instead_of_inc

    def __eq__(self, other):
@@ -1109,7 +1115,7 @@ class IncSubtensor(Op):
        #                 else entry
        #                 for entry in self.idx_list)
        return hashtype(self) ^ hash(idx_list) ^ hash(self.inplace) \
-                        ^ hash(self.set_instead_of_inc)
+               ^ hash(self.set_instead_of_inc)

    def __str__(self):
        indices = []
@@ -1126,10 +1132,10 @@ class IncSubtensor(Op):
            msg += 'Inc'
        else:
            msg += 'Set'
-        return  "%s{%s;%s}" % (
-                self.__class__.__name__,
-                msg,
-                ", ".join(indices))
+        return "%s{%s;%s}" % (
+            self.__class__.__name__,
+            msg,
+            ", ".join(indices))

    def make_node(self, x, y, *inputs):
        """
@@ -1140,25 +1146,26 @@ class IncSubtensor(Op):
        x, y = map(theano.tensor.as_tensor_variable, [x, y])
        if y.ndim > x.ndim:
            raise ValueError(("Trying to increment a %d-dimensional "
-                "subtensor with a %d-dimensional value.") % (x.ndim,
-                    y.ndim))
+                              "subtensor with a %d-dimensional value.") % (
+                                  x.ndim, y.ndim))
        inputs = tuple(map(Subtensor.my_as_scalar, inputs))

        idx_list = list(self.idx_list)
        if len(idx_list) > x.type.ndim:
            exception = ValueError(
-                    Subtensor.e_invalid % (
-                        len(idx_list),
-                        x.type.ndim))
+                Subtensor.e_invalid % (
+                    len(idx_list),
+                    x.type.ndim))
            exception.subtensor_invalid = True
            raise exception

-        input_types = Subtensor.collapse(idx_list,
-                lambda entry: isinstance(entry, gof.Type))
+        input_types = Subtensor.collapse(
+            idx_list,
+            lambda entry: isinstance(entry, gof.Type))
        if len(inputs) != len(input_types):
            raise IndexError(
-                    "Not enough inputs to fill in the Subtensor template.",
-                    inputs, idx_list)
+                "Not enough inputs to fill in the Subtensor template.",
+                inputs, idx_list)
        for input, expected_type in izip(inputs, input_types):
            if input.type != expected_type:
                raise TypeError(
@@ -1442,6 +1449,25 @@ class IncSubtensor(Op):
        else:
            gx = g_output
        gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list)
+        if gy.broadcastable != y.broadcastable:
+            y_broad = (True,) * (gy.ndim - y.ndim) + y.broadcastable
+            assert sum(gy.broadcastable) < sum(y_broad)
+            axis_to_sum = []
+            for i in range(gy.ndim):
+                if gy.broadcastable[i] is False and y_broad[i] is True:
+                    axis_to_sum.append(i)
+                elif (gy.broadcastable[i] is True and
+                      y_broad[i] is False):
+                    # This mean that THeano where able to infer that
+                    # gy.shape[i] is 1, so y.shape[i] is 1, but we
+                    # didn't know it. It is fine.
+                    pass
+                else:
+                    assert gy.broadcastable[i] == y_broad[i]
+            gy = gy.sum(axis=axis_to_sum, keepdims=True)
+            if gy.ndim != y.ndim:
+                gy = gy.dimshuffle(*range(y.ndim, gy.ndim))
+            assert gy.broadcastable == y.broadcastable

        return [gx, gy] + [DisconnectedType()()] * len(idx_list)


--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -88,7 +88,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        f = inplace_func([], t, mode=self.mode)
        topo = f.maker.fgraph.toposort()
        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
+                                                         self.ignore_topo)]
        assert len(topo_) == 1
        if not list:
            assert isinstance(topo_[0].op, self.sub)
@@ -365,19 +365,39 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        f = inplace_func([], gn, mode=self.mode)
        topo = f.maker.fgraph.toposort()
        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
+                                                         self.ignore_topo)]
        if not self.fast_compile:
            assert len(topo_) == 6
        assert numpy.sum([isinstance(node.op, self.inc_sub)
-             for node in topo_]) == 1
+                          for node in topo_]) == 1
        assert numpy.sum([isinstance(node.op, self.sub)
-             for node in topo_]) == 1
+                          for node in topo_]) == 1
        gval = f()

        good = numpy.zeros_like(data)
        good[subi:, subi] = numpy.exp(data[subi:, subi])
        self.assertTrue(numpy.allclose(gval, good), (gval, good))

+    def test_grad_2d_inc_set_subtensor(self):
+        for n_shape, m_shape in [
+            [(2, 3), (2, 2)],
+            [(3, 2), (2, 2)],
+            [(3, 2), (1, 2)],
+            [(3, 2), (2,)],
+        ]:
+            for op in [inc_subtensor, set_subtensor]:
+                subi = 2
+                data = numpy.asarray(rand(*n_shape), dtype=self.dtype)
+                n = self.shared(data)
+                z = scal.constant(subi)
+                m = matrix('m', dtype=self.dtype)
+                mv = numpy.asarray(rand(*m_shape), dtype=self.dtype)
+
+                t = op(n[:z, :z], m)
+                gn, gm = theano.tensor.grad(theano.tensor.sum(t), [n, m])
+                utt.verify_grad(lambda m: op(n[:z, :z], m), [mv])
+                utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data])
+
    def test_grad_0d(self):
        data = numpy.asarray(rand(2, 3), dtype=self.dtype)
        n = self.shared(data)

--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
@@ -643,10 +643,10 @@ theano.compile.register_shape_i_c_code(
        TensorType,
        """
        if(!%(oname)s)
-            %(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
+            %(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
        ((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
        """,
-        version=1)
+        version=2)

 # Register TensorType C code for DeepCopyOp
 theano.compile.register_deep_copy_op_c_code(