Fix crash with GpuAdvancedIncSubtensor1 when y is broadcasted

60c3a741 · Frederic · 2bfe3c82 · 60c3a741 · 60c3a741 · 60c3a741
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2609,11 +2609,17 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
            # CudaNdarray __setitem__ doesn't do broadcast nor support
            # list of index.
            if y.ndim == x.ndim:
-                assert len(y) == len(idx)
+                if len(y) == 1:
-                j = 0
+                    # Allow broadcasting of y[0]
-                for i in idx:
+                    y_0 = y[0]
-                    x[i] = y[j]
+                    for i in idx:
-                    j += 1
+                        x[i] = y_0
+                else:
+                    assert len(y) == len(idx)
+                    j = 0
+                    for i in idx:
+                        x[i] = y[j]
+                        j += 1
            else:
                for i in idx:
                    x[i] = y
@@ -2639,7 +2645,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
        out[0] = x
    def c_code_cache_version(self):
-        return (4,)
+        return (5,)
    def c_code(self, node, name, inputs, outputs, sub):
        if (self.set_instead_of_inc) or \
@@ -2659,6 +2665,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
        dtype_%(ind)s *p_index;
        int num_indices, j;
        int ret;
+        int broadcast_y;
        num_indices = PyArray_SIZE(%(ind)s);
        if ((num_indices - 1) > LONG_MAX) {
@@ -2674,7 +2681,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
            %(out)s = %(x)s;
            Py_XINCREF(%(out)s);
        }
+        broadcast_y = CudaNdarray_DIMS(%(y)s)[0] == 1;
        for (j = 0;j < num_indices; j++) {
             p_index = (dtype_%(ind)s *)PyArray_GETPTR1(%(ind)s, j);
@@ -2690,11 +2697,20 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
                 %(fail)s;
             }
-             y_rowind_obj = PyInt_FromLong(j);
             row_x = CudaNdarray_Subscript((PyObject*)%(out)s, x_rowind_obj);
+             if (row_x == NULL) {
+                  Py_XDECREF(row_x);
+                  Py_XDECREF(x_rowind_obj);
+                  %(fail)s;
+             }
+             if (broadcast_y) {
+                 y_rowind_obj = PyInt_FromLong(0);
+             } else {
+                 y_rowind_obj = PyInt_FromLong(j);
+             }
             row_y = CudaNdarray_Subscript(py_%(y)s, y_rowind_obj);
-             if ((row_x == NULL) || (row_y == NULL)) {
+             if (row_y == NULL) {
                  Py_XDECREF(row_y);
                  Py_XDECREF(row_x);
                  Py_XDECREF(y_rowind_obj);

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -2277,7 +2277,9 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
        }
        else
        {
-            PyErr_SetString(PyExc_IndexError, "index out of bounds");
+            PyErr_Format(PyExc_IndexError,
+                         "index out of bounds. Asked %d, but size of %d",
+                         d_idx, d_dim);
            return NULL;
        }
@@ -2449,7 +2451,9 @@ CudaNdarray_Subscript(PyObject * py_self, PyObject * key)
                    }
                    else
                    {
-                        PyErr_SetString(PyExc_IndexError, "index out of bounds");
+                        PyErr_Format(PyExc_IndexError,
+                                     "index out of bounds. Asked %d for dimensions %d, but size of %d",
+                                     d_idx, d, d_dim);
                        Py_DECREF(rval);
                        return NULL;
                    }

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -914,6 +914,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                    data_num_init = data_num_init.reshape(data_shape)
                    inc_shapes = [data_shape[i:]
                                  for i in xrange(0, len(data_shape) + 1)]
+                    # Test broadcasting of y.
+                    inc_shapes += [(1,) + inc_shapes[-1][1:]]
                    for inc_shape in inc_shapes:
                        inc_n_dims = len(inc_shape)
                        # We copy the numeric value to be 100% sure there is no
@@ -928,6 +930,11 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                        # Symbolic variable with rows to be incremented.
                        idx_var = theano.tensor.vector(dtype='int64')
                        n_to_inc = rng.randint(data_shape[0])
+                        if (n_to_inc == 1 and
+                                len(inc_shape) > 0 and
+                                inc_shape[0] == 1 and
+                                data_shape[0] > 1):
+                            n_to_inc = 2
                        # Corresponding numeric variable.
                        idx_num = rng.randint(0, data_shape[0], n_to_inc)
                        idx_num = idx_num.astype('int64')
@@ -939,7 +946,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                        # `data_shape`: what we actually want is the first
                        # shape element to be equal to the number of rows to
                        # increment.
-                        if len(inc_shape) == len(data_shape):
+                        if len(inc_shape) == len(data_shape) and (
+                                len(inc_shapes) == 0 or inc_shape[0] != 1):
                            inc_shape = (n_to_inc,) + inc_shape[1:]
                        inc_size = numpy.product(inc_shape)
                        # Corresponding numeric variable.
@@ -958,11 +966,19 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                        data_copy = data_num.copy()
                        for j, idx in enumerate(idx_num):
                            if len(inc_shape) == len(data_shape):
-                                # Special case where there is no broadcasting.
+                                if inc_shape[0] == 1:
-                                if set_instead_of_inc:
+                                    # Allow broadcasting of y[0]
-                                    data_copy[idx] = inc_num[j]
+                                    inc_num0 = inc_num[0]
+                                    if set_instead_of_inc:
+                                        data_copy[idx] = inc_num0
+                                    else:
+                                        data_copy[idx] += inc_num0
                                else:
-                                    data_copy[idx] += inc_num[j]
+                                    # Special case where there is no broadcasting.
+                                    if set_instead_of_inc:
+                                        data_copy[idx] = inc_num[j]
+                                    else:
+                                        data_copy[idx] += inc_num[j]
                            else:
                                if set_instead_of_inc:
                                    data_copy[idx] = inc_num