Properly handle cases where y has to be broadcasted in GpuAdvancedIncSubtensor1.

29b460a4 · Arnaud Bergeron · 40a42060 · 29b460a4
--- a/theano/sandbox/gpuarray/subtensor.py
+++ b/theano/sandbox/gpuarray/subtensor.py
@@ -406,49 +406,47 @@ class GpuAdvancedIncSubtensor1(HideC, tensor.AdvancedIncSubtensor1):
        x, y, idx = inp
        out, = out_

+        if not self.inplace:
+            x = x.copy()
+
+        out[0] = x
+
+        if len(idx) == 0:
+            return
+
        # Make sure idx is not a GpuArray otherwise we cannot use its content
        # to index x and y
        if isinstance(idx, gpuarray.GpuArray):
            idx = numpy.asarray(idx)

-        if not self.inplace:
-            x = x.copy()
-        if self.set_instead_of_inc:
-            assert y.ndim <= x.ndim   # Should be guaranteed by `make_node`
-            if y.ndim == x.ndim:
-                assert len(y) == len(idx)
-                for (j, i) in enumerate(idx):
-                    x[i] = y[j]
-            else:
-                for i in idx:
-                    x[i] = y
-        else:
        # If `y` has as many dimensions as `x`, then we want to iterate
        # jointly on `x` and `y`. Otherwise, it means `y` should be
        # broadcasted to fill all relevant rows of `x`.
-            assert y.ndim <= x.ndim   # Should be guaranteed by `make_node`
-
-            if len(idx) == 0:
-                pass
-            # if len(y) == 1, we need to broadcast it.
-            elif y.ndim == x.ndim and len(y) != 1:
+        if y.ndim == x.ndim and y.shape[0] != 1:
            assert len(y) == len(idx)
-
+            if self.set_instead_of_inc:
+                for (j, i) in enumerate(idx):
+                    x[i] = y[j]
+            else:
                k = self.getInplElemwiseAdditionKernel(x[0], y[0])
-
                for (j, i) in enumerate(idx):
-                    k(x[i], y[j], broadcast=False)
+                    k(x[i], y[j], broadcast=True)
+        else:
+            if y.ndim == x.ndim:
+                # First dim is always 1 in this case.
+                reshaped_y = y.reshape(y.shape[1:])
            else:
                nb_dims_to_add = (x.ndim - 1) - y.ndim
                reshaped_y = y.reshape((1,)*nb_dims_to_add + y.shape)
-                k = self.getInplElemwiseAdditionKernel(x[0],
-                                                       reshaped_y)

+            if self.set_instead_of_inc:
+                for i in idx:
+                    x[i] = reshaped_y
+            else:
+                k = self.getInplElemwiseAdditionKernel(x[0], reshaped_y)
                for i in idx:
                    k(x[i], reshaped_y, broadcast=True)

-        out[0] = x
-

 class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
    """Implement AdvancedIncSubtensor1 on the gpu, but use function