- Change for flattening out `y` as well

- Modified test to make sure this works

- Change for flattening out `y` as well
a099b54c · Shawn Tan · d2cd02d0 · a099b54c · a099b54c
--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -658,14 +658,11 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
                    except Exception:
                        pass
        x_ = x_.transpose(*transp)
        idx_ = ([slice(None)] * p + nidx[p:])
        x_ = x_.__getitem__(idx_)
        # flatten the array-indexed dimensions
-        shape = ((np.prod(x_.shape[0: p]),) +
+        x_flat = x_.reshape((np.prod(x_.shape[0: p]),) + x_.shape[p:])
-                 x_.shape[p:])
+        y_flat = y.reshape((np.prod(y.shape[0: p]),) + y.shape[p:])
-        x_flat = x_.reshape(shape)
        # build the strides
        strides = [1]
@@ -674,11 +671,12 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
            strides.insert(0, stride)
        # build the indices and use it
-        take_idx = sum((i * s for i, s in zip(nidx, strides)))
+        take_idx = sum((i * s for i, s in zip(nidx, strides))).flatten()
        k = get_iadd(node.inputs[0], node.inputs[1])
-        y = pygpu.asarray(y, context=x_flat.context)
+        y_flat = pygpu.asarray(y_flat, context=x_flat.context)
        for j, i in enumerate(take_idx):
-            k(x_flat[i], y[j], broadcast=True)
+            k(x_flat[i], y_flat[j], broadcast=True)
        out[0] = x

--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -78,22 +78,27 @@ class G_subtensorF16(test_subtensor.T_subtensor):
 def test_advinc_subtensor():
-    shp = (3, 3, 3)
+    x_shp = (8, 8, 8)
+    y_shp = (2, 2, 8)
    shared = gpuarray_shared_constructor
-    xval = np.arange(np.prod(shp), dtype='float32').reshape(shp) + 1
+    xval = np.arange(np.prod(x_shp), dtype='float32').reshape(x_shp) + 1
-    yval = np.arange(np.prod(shp[1:]), dtype='float32').reshape(shp[1:])
+    yval = np.arange(np.prod(y_shp), dtype='float32').reshape(y_shp)
-    idx = ([0, 1, 2], [0, 1, 2])
+    idx = ([[0, 1],
+            [2, 3]],
+           [[0, 1],
+            [2, 3]])
+    rep = xval.copy()
+    rep[idx] += yval
    x = shared(xval, name='x')
    y = tensor.tensor(dtype='float32',
-                      broadcastable=(False, False),
+                      broadcastable=(False,) * len(yval.shape),
                      name='y')
    expr = tensor.advanced_inc_subtensor(x, y, *idx)
    f = theano.function([y], expr, mode=mode_with_gpu)
    assert sum([isinstance(node.op, GpuAdvancedIncSubtensor)
                for node in f.maker.fgraph.toposort()]) == 1
    rval = f(yval)
-    rep = xval.copy()
-    rep[idx] += yval
    assert np.allclose(rval, rep)
 >>>>>>> Initial additions for `GpuAdvancedIncSubtensor`