Make GpuAdvancedIncSubtensor1_dev20 being introduced for vector_scalar case…

Make GpuAdvancedIncSubtensor1_dev20 being introduced for vector_scalar case instead of GpuAdvancedIncSubtensor1

Make GpuAdvancedIncSubtensor1_dev20 being introduced for vector_scalar case…
d0043caf · Frederic Bastien · 3d716fd6 · d0043caf · d0043caf
--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -1036,7 +1036,13 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
    set_instead_of_inc = op.set_instead_of_inc
    compute_capability = int(context.bin_id[-2])
-    if compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
+    if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0:
+        x = x.dimshuffle(0, 'x')
+        y = y.dimshuffle('x', 'x')
+        ret = GpuAdvancedIncSubtensor1_dev20(
+            set_instead_of_inc=set_instead_of_inc)(x, y, ilist).dimshuffle(0)
+        return ret
+    elif compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
        return GpuAdvancedIncSubtensor1(
            set_instead_of_inc=set_instead_of_inc)
    else:

--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -66,7 +66,7 @@ def test_advinc_subtensor1():
 def test_advinc_subtensor1_dtype():
    # Test the mixed dtype case
-    shp = (3, 3)
+    shp = (3, 4)
    for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]:
        shared = gpuarray_shared_constructor
        xval = numpy.arange(numpy.prod(shp), dtype=dtype1).reshape(shp) + 1
@@ -74,7 +74,28 @@ def test_advinc_subtensor1_dtype():
        yval[:] = 10
        x = shared(xval, name='x')
        y = tensor.tensor(dtype=yval.dtype,
-                          broadcastable=(False,) * len(shp),
+                          broadcastable=(False,) * len(yval.shape),
+                          name='y')
+        expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
+        f = theano.function([y], expr, mode=mode_with_gpu)
+        assert sum([isinstance(node.op, GpuAdvancedIncSubtensor1_dev20)
+                    for node in f.maker.fgraph.toposort()]) == 1
+        rval = f(yval)
+        rep = xval.copy()
+        rep[[0, 2]] += yval
+        assert numpy.allclose(rval, rep)
+def test_advinc_subtensor1_vector_scalar():
+    # Test the case where x is a vector and y a scalar
+    shp = (3,)
+    for dtype1, dtype2 in [('float32', 'int8'), ('float32', 'float64')]:
+        shared = gpuarray_shared_constructor
+        xval = numpy.arange(numpy.prod(shp), dtype=dtype1).reshape(shp) + 1
+        yval = numpy.asarray(10, dtype=dtype2)
+        x = shared(xval, name='x')
+        y = tensor.tensor(dtype=yval.dtype,
+                          broadcastable=(False,) * len(yval.shape),
                          name='y')
        expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
        f = theano.function([y], expr, mode=mode_with_gpu)