Merge pull request #2326 from lamblin/fix_inc_set_subtensor1

[BUG] Fix bug in {inc,set}_subtensor in advanced1 case

Merge pull request #2326 from lamblin/fix_inc_set_subtensor1
560fb116 · Frédéric Bastien · 50f364ee · 1e507512 · 560fb116 · 560fb116
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -398,7 +398,7 @@ import theano and print the config variable, as in:

 .. attribute:: config.warn.ignore_bug_before

-    String value: 'None', 'all', '0.3', '0.4', '0.4.1', '0.5', '0.6'
+    String value: 'None', 'all', '0.3', '0.4', '0.4.1', '0.5', '0.6', '0.7'

    Default: 'None'


--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -462,6 +462,14 @@ AddConfigVar('warn.reduce_join',
             BoolParam(warn_default('0.7')),
             in_c_key=False)

+AddConfigVar('warn.inc_set_subtensor1',
+             ('Warn if previous versions of Theano (before 0.7) could have '
+              'given incorrect results for inc_subtensor and set_subtensor '
+              'when using some patterns of advanced indexing (indexing with '
+              'one vector or matrix of ints).'),
+             BoolParam(warn_default('0.7')),
+             in_c_key=False)
+
 AddConfigVar('compute_test_value',
        ("If 'True', Theano will run each op at graph build time, using "
         "Constants, SharedVariables and the tag 'test_value' as inputs "

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
@@ -15,6 +15,7 @@ from theano.gof import Apply, Constant, hashtype, Op, Type, MethodNotDefined
 from theano.gof.python25 import maxsize
 from theano.printing import pprint
 from theano import scalar as scal
+from theano.tensor.basic import alloc
 from theano.tensor.basic import (addbroadcast, clip, get_scalar_constant_value,
                                 ARange, TensorType, NotScalarConstantError)
 from theano.tensor.elemwise import DimShuffle
@@ -1022,8 +1023,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
                         "subtensor with a %d-dimensional value.") % (x.ndim,
                                                                      y.ndim))

+    dim_offset = x.ndim - y.ndim
    for dim in range(y.ndim):
-        dim_offset = x.ndim - y.ndim
        if (x.broadcastable[dim + dim_offset]
                and not y.broadcastable[dim]):
            # It is acceptable to try to increment a subtensor with a
@@ -1071,23 +1072,77 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
        # completely, but the problem is that advanced_inc_subtensor1 can only
        # work on the first (outer-most, left-most) dimension of x,
        # just like advanced_subtensor1.
-        # So we call advanced_inc_subtensor1(x.T, i, y), but then we need to
+        # So we call advanced_inc_subtensor1(x.T, i, y.T) (as we also need to
+        # transpose y if it is not a scalar or a vector), but then we need to
        # return something that has the same shape as x, not as x.T (inner_x).
        # So re-apply the outer dimshuffle on the new inc_subtensor,
-        # and return advanced_inc_subtensor1(x.T, i, y).T.
+        # and return advanced_inc_subtensor1(x.T, i, y.T).T.
+
+        # Get the dimshuffle pattern to apply to y.
+        x_order = x.owner.op.new_order
+        y_order = ['x'] * x.ndim
+        for i, v in enumerate(x_order):
+            if v != 'x' and (v - dim_offset) >= 0:
+                y_order[v - dim_offset] = i
+
+        # Warn if this code path would have produced wrong results in the past
+        if config.warn.inc_set_subtensor1:
+            # Dimshuffle pattern for y that would be equivalent to past code
+            prev_y_order = ['x'] * (dim_offset) + list(range(y.ndim))
+            if y_order != prev_y_order:
+                warnings.warn(
+                    'Although your current code is fine, please note that '
+                    'earlier versions prior to 0.7 (or this development '
+                    'version) may have yielded an incorrect result in '
+                    'this `inc_subtensor` or `set_subtensor` operation. '
+                    'To remove this warning, you can either set the '
+                    '`warn.inc_set_subtensor1` config option to `False`, '
+                    'or `warn.ignore_bug_before` to at least "0.7".',
+                    stacklevel=2)
+
        inner_incsubtensor = inc_subtensor(
-            inner_x, y,
+            inner_x,
+            y.dimshuffle(y_order),
            inplace=inplace,
            set_instead_of_inc=set_instead_of_inc,
            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
        return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
    elif isinstance(x.owner.op, theano.tensor.Reshape):
+        # This case happens when the indices are not arranged as a vector, but
+        # as a higher-dimensional array. This is handled by the subtensor
+        # by flattening this list, taking the subtensor, then reshaping the
+        # result.
        inner_x = x.owner.inputs[0]
        # Try to apply inc_subtensor on inner_x.
        # If it works, there is no need to reshape, as the inc_subtensor
        # will have the same shape as inner_x, which is what we want.
+        # We also explicitly duplicate y to its broadcasted shape
+        # before we partially flatten it to inner_x dimension. This is
+        # not strictly needed in all cases, but it is easier this way.
+        if y.ndim > 0:
+            # This if is needed to prevent some useless warning about
+            # old code bug.
+            expanded_y = alloc(y, *[x.shape[i] for i in range(x.ndim)])
+            flattened_y = expanded_y.flatten(inner_x.ndim)
+        else:
+            flattened_y = y
+
+        # Warn if this code path would have produced wrong results in the past
+        if config.warn.inc_set_subtensor1:
+            if inner_x.ndim > 1 and sum(y.broadcastable) > 0:
+                warnings.warn(
+                    'Although your current code is fine, please note that '
+                    'earlier versions prior to 0.7 (or this development '
+                    'version) may have yielded an incorrect result in '
+                    'this `inc_subtensor` or `set_subtensor` operation. '
+                    'To remove this warning, you can either set the '
+                    '`warn.inc_set_subtensor1` config option to `False`, '
+                    'or `warn.ignore_bug_before` to at least "0.7".',
+                    stacklevel=2)
+
        inner_incsubtensor = inc_subtensor(
-            inner_x, y.flatten(),
+            inner_x,
+            flattened_y,
            inplace=inplace,
            set_instead_of_inc=set_instead_of_inc,
            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
@@ -1784,9 +1839,14 @@ class AdvancedIncSubtensor1(Op):
        # broadcasted to fill all relevant rows of `x`.
        assert y.ndim <= x.ndim   # Should be guaranteed by `make_node`
        if y.ndim == x.ndim:
-            assert len(y) == len(idx)
-            for (j, i) in enumerate(idx):
-                x[i] += y[j]
+            if len(y) == 1:
+                # Allow broadcasting of y[0]
+                for i in idx:
+                    x[i] += y[0]
+            else:
+                assert len(y) == len(idx)
+                for (j, i) in enumerate(idx):
+                    x[i] += y[j]
        else:
            for i in idx:
                x[i] += y

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -1037,6 +1037,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):

        m1 = set_subtensor(m[:, i], 0)
        m2 = inc_subtensor(m[:, i], 1)
+
        f = theano.function([m, i], [m1, m2])

        m_val = rand(5, 7)
@@ -1052,6 +1053,80 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
        assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)

+    def test_adv1_inc_sub_notlastdim_1_2dval_broadcast(self):
+        # Test that taking 1-dimensional advanced indexing
+        # over a dimension that's not the first (outer-most),
+        # and incrementing/setting with broadcast
+        m = matrix('m')
+
+        # Test for both vector and matrix as index
+        sym_i = (lvector('i'), lmatrix('i'))
+        shape_i = ((4,), (4, 2))
+        shape_val = ((3, 1), (3, 1, 1))
+
+        # Disable the warning emitted for that case
+        orig_warn = config.warn.inc_set_subtensor1
+        try:
+            config.warn.inc_set_subtensor1 = False
+
+            for i, shp_i, shp_v in zip(sym_i, shape_i, shape_val):
+                sub_m = m[:, i]
+                m1 = set_subtensor(sub_m, numpy.zeros(shp_v))
+                m2 = inc_subtensor(sub_m, numpy.ones(shp_v))
+                f = theano.function([m, i], [m1, m2])
+
+                m_val = rand(3, 5)
+                i_val = randint_ranged(min=0, max=4, shape=shp_i)
+                m1_ref = m_val.copy()
+                m2_ref = m_val.copy()
+
+                m1_val, m2_val = f(m_val, i_val)
+                for idx in i_val.ravel():
+                    m1_ref[:, idx] = 0
+                    m2_ref[:, idx] += 1
+
+                assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
+                assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
+        finally:
+            config.warn.inc_set_subtensor1 = orig_warn
+
+    def test_adv1_inc_sub_notlastdim_1_2dval_no_broadcast(self):
+        # Test that taking 1-dimensional advanced indexing
+        # over a dimension that's not the first (outer-most),
+        # and incrementing/setting without broadcast
+        m = matrix('m')
+
+        # Test for both vector and matrix as index
+        sym_i = (lvector('i'), lmatrix('i'))
+        shape_i = ((4,), (4, 2))
+        shape_val = ((3, 4), (3, 4, 2))
+
+        # Disable the warning emitted for that case
+        orig_warn = config.warn.inc_set_subtensor1
+
+        try:
+            config.warn.inc_set_subtensor1 = False
+            for i, shp_i, shp_v in zip(sym_i, shape_i, shape_val):
+                sub_m = m[:, i]
+                m1 = set_subtensor(sub_m, numpy.zeros(shp_v))
+                m2 = inc_subtensor(sub_m, numpy.ones(shp_v))
+                f = theano.function([m, i], [m1, m2])
+
+                m_val = rand(3, 5)
+                i_val = randint_ranged(min=0, max=4, shape=shp_i)
+                m1_ref = m_val.copy()
+                m2_ref = m_val.copy()
+
+                m1_val, m2_val = f(m_val, i_val)
+                for idx in i_val:
+                    m1_ref[:, idx] = 0
+                    m2_ref[:, idx] += 1
+
+                assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
+                assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
+        finally:
+            config.warn.inc_set_subtensor1 = orig_warn
+

 class TestIncSubtensor1(unittest.TestCase):
    # test inc_subtensor