Merge pull request #4381 from nouiz/set_subtensor1_crash

Fix crash in GpuAdvanvedIncSubtensor1 when unbroadcasting the value.

Merge pull request #4381 from nouiz/set_subtensor1_crash
189b1352 · Frédéric Bastien · 7927f4a0 · f68c8fd5 · 189b1352 · 189b1352
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2888,7 +2888,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
        out[0] = x

    def c_code_cache_version(self):
-        return (7,)
+        return (8,)

    def c_code(self, node, name, inputs, outputs, sub):
        if (node.inputs[0].ndim != node.inputs[1].ndim):
@@ -2961,7 +2961,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
                  %(fail)s;
             }
             if (%(set_instead_of_inc)s) {
-                 ret = CudaNdarray_CopyFromCudaNdarray((CudaNdarray *) row_x, (CudaNdarray *) row_y);
+                 ret = CudaNdarray_CopyFromCudaNdarray((CudaNdarray *) row_x, (CudaNdarray *) row_y, 1);
             } else {
                 ret = CudaNdarray_inplace_elemwise(row_x, row_y, IADD);
             }

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -560,6 +560,25 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        utt.verify_grad(fun, [numpy.random.rand(5, 5).astype(self.dtype),
                              numpy.random.rand(2, 5).astype(self.dtype)])

+        # test set_subtensor broadcast
+        self.dtype = 'float32'
+        from theano.sandbox.cuda.dnn import dnn_conv
+
+        x = tensor.tensor4('x', dtype=self.dtype)
+        indexes = theano.shared(numpy.int32([1, 2, 3, 4]))
+        W = self.shared(numpy.random.random(
+            (10, 10, 3, 3)).astype(self.dtype))
+
+        h = x + W
+        h = tensor.set_subtensor(h[indexes], h[indexes])
+        g = tensor.grad(h.sum(), W)
+        N = 2
+        if theano.config.mode == "FAST_COMPILE" and self.adv_incsub1 is tensor.AdvancedIncSubtensor1:
+            N = 3
+        f = self.function([x], g, op=self.adv_incsub1, N=N)
+
+        f(numpy.random.random((10, 10, 3, 3)).astype(self.dtype))
+
    def test_adv_sub1_idx_broadcast(self):
        # The idx can be a broadcastable vector.
        ones = numpy.ones((4, 3), dtype=self.dtype)