Change optimizer and include more tests

f71035bf · Shawn Tan · 80a1017d · f71035bf · f71035bf
--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -1066,36 +1066,39 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
 @register_opt('fast_compile')
 @op_lifter([tensor.AdvancedIncSubtensor1])
-@op_lifter([tensor.AdvancedIncSubtensor])
+@register_opt2([tensor.AdvancedIncSubtensor1], 'fast_compile')
-@register_opt2([tensor.AdvancedIncSubtensor1, tensor.AdvancedIncSubtensor], 'fast_compile')
+def local_gpua_advanced_incsubtensor1(op, context_name, inputs, outputs):
-def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
    if isinstance(op, (tensor.AdvancedIncSubtensor1)):
        context = get_context(context_name)
        # This is disabled on non-cuda contexts
        if context.kind != b'cuda':
            return None
-        x, y, ilist = inputs
+    x, y, ilist = inputs
-        set_instead_of_inc = op.set_instead_of_inc
+    set_instead_of_inc = op.set_instead_of_inc
-        compute_capability = int(context.bin_id[-2])
+    compute_capability = int(context.bin_id[-2])
-        if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0:
+    if compute_capability >= 2 and x.ndim == 1 and y.ndim == 0:
-            x = x.dimshuffle(0, 'x')
+        x = x.dimshuffle(0, 'x')
-            y = y.dimshuffle('x', 'x')
+        y = y.dimshuffle('x', 'x')
-            ret = GpuAdvancedIncSubtensor1_dev20(
+        ret = GpuAdvancedIncSubtensor1_dev20(
-                set_instead_of_inc=set_instead_of_inc)(x, y, ilist)
+            set_instead_of_inc=set_instead_of_inc)(x, y, ilist)
-            ret = GpuDimShuffle(ret.type.broadcastable, [0])(ret)
+        ret = GpuDimShuffle(ret.type.broadcastable, [0])(ret)
-            return ret
+        return ret
-        elif compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
+    elif compute_capability < 2 or x.ndim != 2 or y.ndim != 2:
-            return GpuAdvancedIncSubtensor1(
+        return GpuAdvancedIncSubtensor1(
-                set_instead_of_inc=set_instead_of_inc)
+            set_instead_of_inc=set_instead_of_inc)
-        else:
+    else:
-            return GpuAdvancedIncSubtensor1_dev20(
+        return GpuAdvancedIncSubtensor1_dev20(
-                set_instead_of_inc=set_instead_of_inc)
+            set_instead_of_inc=set_instead_of_inc)
-    elif isinstance(op, (tensor.AdvancedIncSubtensor)):
-        return GpuAdvancedIncSubtensor()
+@register_opt('fast_compile')
+@op_lifter([tensor.AdvancedIncSubtensor])
+@register_opt2([tensor.AdvancedIncSubtensor], 'fast_compile')
+def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
+    return GpuAdvancedIncSubtensor()
 @register_inplace()

--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -78,23 +78,45 @@ class G_subtensorF16(test_subtensor.T_subtensor):
 def test_advinc_subtensor():
-    shp = (3, 3, 3)
+    x_shp = (20, 15, 10, 5)
    shared = gpuarray_shared_constructor
-    xval = np.arange(np.prod(shp), dtype='float32').reshape(shp) + 1
-    yval = np.arange(np.prod(shp[1:]), dtype='float32').reshape(shp[1:])
+    def check(idx, y_val, x_val, true):
-    idx = ([0, 1, 2], [0, 1, 2])
+        x = shared(x_val, name='x')
-    x = shared(xval, name='x')
+        y = tensor.tensor(dtype='float32',
-    y = tensor.tensor(dtype='float32',
+                          broadcastable=(False,) * len(y_val.shape),
-                      broadcastable=(False, False),
+                          name='y')
-                      name='y')
+        sym_idx = [tensor.as_tensor_variable(ix) for ix in idx]
-    expr = tensor.advanced_inc_subtensor(x, y, *idx)
+        expr = tensor.advanced_inc_subtensor(x, y, *sym_idx)
-    f = theano.function([y], expr, mode=mode_with_gpu)
+        f = theano.function([y], expr, mode=mode_with_gpu)
-    assert sum([isinstance(node.op, GpuAdvancedIncSubtensor)
+        assert sum([isinstance(node.op, GpuAdvancedIncSubtensor)
-                for node in f.maker.fgraph.toposort()]) == 1
+                    for node in f.maker.fgraph.toposort()]) == 1
-    rval = f(yval)
+        rval = f(y_val)
-    rep = xval.copy()
+        assert np.allclose(rval, true)
-    rep[idx] += yval
-    assert np.allclose(rval, rep)
+    idxs_y_shp_pairs = [
+        ((0, [1, 3, 5], 1), (3, 5)),
+        (([1, 2, 4, 8],), (4, 15, 10, 5)),
+        (([0, 1, 2], 0, [0, 1, 2]), (3, 3, 5)),
+        (([[0, 1], [2, 3]], [[0, 1], [2, 3]]), (2, 2, 10, 5)),
+    ]
+    for idx, y_shps in idxs_y_shp_pairs:
+        for i in range(len(y_shps) - 1):
+            y_shp = y_shps[i:]
+            x_val = np.arange(np.prod(x_shp), dtype='float32').reshape(x_shp) + 1
+            y_val = np.arange(np.prod(y_shp), dtype='float32').reshape(y_shp) + 1
+            rep = x_val.copy()
+            try:
+                rep[idx] += y_val
+            except ValueError:
+                continue
+            yield check, idx, y_val, x_val, rep
+        x_val = np.arange(np.prod(x_shp), dtype='float32').reshape(x_shp) + 1
+        y_val = np.array(1).astype(np.float32)
+        rep = x_val.copy()
+        rep[idx] += y_val
+        yield check, idx, y_val, x_val, rep
 def test_advinc_subtensor1():
@@ -157,6 +179,7 @@ def test_advinc_subtensor1_vector_scalar():
                          name='y')
        expr = tensor.advanced_inc_subtensor1(x, y, [0, 2])
        f = theano.function([y], expr, mode=mode_with_gpu)
        assert sum([isinstance(node.op, (GpuAdvancedIncSubtensor1_dev20,
                                         GpuAdvancedIncSubtensor1))
                    for node in f.maker.fgraph.toposort()]) == 1