Merge pull request #5212 from lamblin/fix_subtensor_setsubtensor

Fix issue in local_subtensor_incsubtensor

Merge pull request #5212 from lamblin/fix_subtensor_setsubtensor
86cb447e · Frédéric Bastien · GitHub · 7fd891e9 · f0d2b735 · 86cb447e
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -1910,14 +1910,22 @@ def local_subtensor_inc_subtensor(node):
        if not x.owner.op.set_instead_of_inc:
            return

-        if x.owner.inputs[2:] == node.inputs[1:] and tuple(x.owner.op.idx_list) == tuple(node.op.idx_list):
-            # if x[idx] and y have the same ndim (and shape), directly return y
-            if x.owner.inputs[0].ndim - (len(node.op.idx_list) - sum([isinstance(idx, slice) for idx in node.op.idx_list])) == x.owner.inputs[1].ndim:
-                return [x.owner.inputs[1]]
-            # else y is broadcastable, return alloc of broadcastable y
+        if (x.owner.inputs[2:] == node.inputs[1:] and
+                tuple(x.owner.op.idx_list) == tuple(node.op.idx_list)):
+            out = node.outputs[0]
+            y = x.owner.inputs[1]
+            # If the dtypes differ, cast y into x.dtype
+            if x.dtype != y.dtype:
+                y = y.astype(x.dtype)
+            if out.type == y.type:
+                # if x[idx] and y have the same type, directly return y
+                return [y]
            else:
+                # The difference is related to broadcasting pattern
+                assert out.broadcastable != y.broadcastable
+                # We have to alloc y to the shape of x[idx]
                x_subtensor = node.op(x.owner.inputs[0], *x.owner.inputs[2:])
-                return [T.alloc(x.owner.inputs[1], *x_subtensor.shape)]
+                return [T.alloc(y, *x_subtensor.shape)]
        else:
            return


--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -989,6 +989,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        all_inputs_num = []
        all_outputs_var = []
        all_outputs_num = []
+        all_params = []
        for set_instead_of_inc in (False, True):
            for inplace in (False, True):
                for data_shape in ((10,), (4, 5), (1, 2, 3), (4, 5, 6, 7)):
@@ -1021,7 +1022,11 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                                data_shape[0] > 1):
                            n_to_inc = 2
                        # Corresponding numeric variable.
-                        idx_num = rng.randint(0, data_shape[0], n_to_inc)
+                        # If set_instead_of_inc, we want to avoid repeating
+                        # indices, as the order is not guaranteed.
+                        idx_num = rng.choice(numpy.arange(data_shape[0]),
+                                             n_to_inc,
+                                             replace=(not set_instead_of_inc))
                        idx_num = idx_num.astype('int64')
                        # Symbolic variable with increment value.
                        inc_var = self.type(
@@ -1079,6 +1084,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                        all_inputs_num += [data_num, idx_num, inc_num]
                        all_outputs_var.append(output)
                        all_outputs_num.append(data_copy)
+                        all_params.append((set_instead_of_inc, inplace, data_shape, inc_shape))
                        if False:  # Enable for debugging purpose.
                            f = self.function([data_var, idx_var, inc_var],
                                              output, accept_inplace=inplace,
@@ -1105,10 +1111,10 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):

        f_outs = f(*all_inputs_num)
        assert len(f_outs) == len(all_outputs_num)
-        for f_out, output_num in izip(f_outs, all_outputs_num):
+        for params, f_out, output_num in izip(all_params, f_outs, all_outputs_num):
            # NB: if this assert fails, it will probably be easier to debug if
            # you enable the debug code above.
-            assert numpy.allclose(f_out, output_num)
+            assert numpy.allclose(f_out, output_num), (params, f_out, output_num)

    def test_adv_constant_arg(self):
        # Test case provided (and bug detected, gh-607) by John Salvatier