Add opt local_adv_sub1_adv_inc_sub1

AdvancedSubtensor1(AdvancedIncSubtensor1(0s, y, idx), idx) -> y AdvancedSubtensor1(AdvancedSetSubtensor1(x, y, idx), idx) -> y gh-2246 TODO: doc the new tag

Add opt local_adv_sub1_adv_inc_sub1
e89232cb · Frederic · 2e5f6c41 · e89232cb · e89232cb
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -2452,6 +2452,40 @@ def local_setsubtensor_of_constants(node):
            return False


+@register_canonicalize("rm_idx_err", "rm_shape_err")
+@register_stabilize("rm_idx_err", "rm_shape_err")
+@gof.local_optimizer([AdvancedSubtensor1])
+def local_adv_sub1_adv_inc_sub1(node):
+    """ Optimize the possible AdvSub1(AdvIncSub1(...), ...)
+
+    AdvancedSubtensor1(AdvancedIncSubtensor1(0s, y, idx), idx) -> y
+    AdvancedSubtensor1(AdvancedSetSubtensor1(x, y, idx), idx) -> y
+
+    :note: This opt can remove index errors.  We should assert that idx
+        values are in range and that x and y have compatible shapes.
+
+    :todo: add AssertOp to do not remove shape error.
+    """
+    if not isinstance(node.op, AdvancedSubtensor1):
+        return
+    x = node.inputs[0]
+    if (not x.owner or
+        not isinstance(x.owner.op, AdvancedIncSubtensor1)):
+        return
+    idx = node.inputs[1]
+    idx2 = x.owner.inputs[2]
+    y = x.owner.inputs[1]
+    if idx is not idx2:
+        return
+    if (not x.owner.op.set_instead_of_inc and
+        T.extract_constant(x.owner.inputs[0]) != 0):
+        return
+    if y.dtype == node.outputs[0].dtype:
+        return [y]
+    # It is possible that y is upcast or downcast to x.dtype.
+    # In all case, as we set or add with 0, we can just cast y.
+    return [T.cast(y, x.dtype)]
+
 ####################
 # Rebroadcast opts #
 ####################

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -2417,6 +2417,64 @@ class test_local_subtensor_merge(unittest.TestCase):
                        f(x_val, *i_val)


+class test_local_adv_sub1_adv_inc_sub1(unittest.TestCase):
+    def setUp(self):
+        utt.seed_rng()
+        mode = theano.compile.mode.get_default_mode()
+        self.mode = mode.including("local_adv_sub1_adv_inc_sub1")
+
+    def test0(self):
+        for dtype1, dtype2 in [("float32", "float32"),
+                               ("float32", "float64"),
+                               ("float64", "float32"),
+                               ("float64", "float64")]:
+            x = tensor.matrix(dtype=dtype1)
+            y = tensor.matrix(dtype=dtype2)
+            idx = tensor.ivector()
+
+            dx = numpy.random.rand(4, 5).astype(dtype1)
+            dy = numpy.random.rand(2, 5).astype(dtype2)
+            didx = numpy.asarray([1, 3], "int32")
+
+            # set_subtensor
+            inc = tensor.set_subtensor(x[idx], y)
+            o = inc[idx]
+            f = theano.function([x, y, idx], o, self.mode)
+
+            res = f(dx, dy, didx)
+            assert numpy.allclose(dy, res)
+            topo = f.maker.fgraph.toposort()
+            if opt:
+                assert len(topo) == 1
+                assert isinstance(topo[0].op, (compile.DeepCopyOp, T.Elemwise))
+            else:
+                assert len(topo) == 2
+
+            # inc_subtensor(data[idx], y)
+            inc = tensor.inc_subtensor(x[idx], y)
+            o = inc[idx]
+            f = theano.function([x, y, idx], o, self.mode)
+
+            res = f(dx, dy, didx)
+            assert numpy.allclose((dx[didx] + dy), res)
+            topo = f.maker.fgraph.toposort()
+            len(topo) == 2
+
+            # inc_subtensor(0[idx], y)
+            inc = tensor.inc_subtensor(x.zeros_like()[idx], y)
+            o = inc[idx]
+            f = theano.function([x, y, idx], o, self.mode)
+
+            res = f(dx, dy, didx)
+            assert numpy.allclose(dy, res)
+            topo = f.maker.fgraph.toposort()
+            if opt:
+                assert len(topo) == 1
+                assert isinstance(topo[0].op, (compile.DeepCopyOp, T.Elemwise))
+            else:
+                assert len(topo) > 2
+
+
 class Test_alloc_zero(unittest.TestCase):
    def setUp(self):
        mode = theano.compile.mode.get_default_mode()