提交 560fb116 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2326 from lamblin/fix_inc_set_subtensor1

[BUG] Fix bug in {inc,set}_subtensor in advanced1 case
...@@ -398,7 +398,7 @@ import theano and print the config variable, as in: ...@@ -398,7 +398,7 @@ import theano and print the config variable, as in:
.. attribute:: config.warn.ignore_bug_before .. attribute:: config.warn.ignore_bug_before
String value: 'None', 'all', '0.3', '0.4', '0.4.1', '0.5', '0.6' String value: 'None', 'all', '0.3', '0.4', '0.4.1', '0.5', '0.6', '0.7'
Default: 'None' Default: 'None'
......
...@@ -462,6 +462,14 @@ AddConfigVar('warn.reduce_join', ...@@ -462,6 +462,14 @@ AddConfigVar('warn.reduce_join',
BoolParam(warn_default('0.7')), BoolParam(warn_default('0.7')),
in_c_key=False) in_c_key=False)
AddConfigVar('warn.inc_set_subtensor1',
('Warn if previous versions of Theano (before 0.7) could have '
'given incorrect results for inc_subtensor and set_subtensor '
'when using some patterns of advanced indexing (indexing with '
'one vector or matrix of ints).'),
BoolParam(warn_default('0.7')),
in_c_key=False)
AddConfigVar('compute_test_value', AddConfigVar('compute_test_value',
("If 'True', Theano will run each op at graph build time, using " ("If 'True', Theano will run each op at graph build time, using "
"Constants, SharedVariables and the tag 'test_value' as inputs " "Constants, SharedVariables and the tag 'test_value' as inputs "
......
...@@ -15,6 +15,7 @@ from theano.gof import Apply, Constant, hashtype, Op, Type, MethodNotDefined ...@@ -15,6 +15,7 @@ from theano.gof import Apply, Constant, hashtype, Op, Type, MethodNotDefined
from theano.gof.python25 import maxsize from theano.gof.python25 import maxsize
from theano.printing import pprint from theano.printing import pprint
from theano import scalar as scal from theano import scalar as scal
from theano.tensor.basic import alloc
from theano.tensor.basic import (addbroadcast, clip, get_scalar_constant_value, from theano.tensor.basic import (addbroadcast, clip, get_scalar_constant_value,
ARange, TensorType, NotScalarConstantError) ARange, TensorType, NotScalarConstantError)
from theano.tensor.elemwise import DimShuffle from theano.tensor.elemwise import DimShuffle
...@@ -1022,8 +1023,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, ...@@ -1022,8 +1023,8 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
"subtensor with a %d-dimensional value.") % (x.ndim, "subtensor with a %d-dimensional value.") % (x.ndim,
y.ndim)) y.ndim))
for dim in range(y.ndim):
dim_offset = x.ndim - y.ndim dim_offset = x.ndim - y.ndim
for dim in range(y.ndim):
if (x.broadcastable[dim + dim_offset] if (x.broadcastable[dim + dim_offset]
and not y.broadcastable[dim]): and not y.broadcastable[dim]):
# It is acceptable to try to increment a subtensor with a # It is acceptable to try to increment a subtensor with a
...@@ -1071,23 +1072,77 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False, ...@@ -1071,23 +1072,77 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
# completely, but the problem is that advanced_inc_subtensor1 can only # completely, but the problem is that advanced_inc_subtensor1 can only
# work on the first (outer-most, left-most) dimension of x, # work on the first (outer-most, left-most) dimension of x,
# just like advanced_subtensor1. # just like advanced_subtensor1.
# So we call advanced_inc_subtensor1(x.T, i, y), but then we need to # So we call advanced_inc_subtensor1(x.T, i, y.T) (as we also need to
# transpose y if it is not a scalar or a vector), but then we need to
# return something that has the same shape as x, not as x.T (inner_x). # return something that has the same shape as x, not as x.T (inner_x).
# So re-apply the outer dimshuffle on the new inc_subtensor, # So re-apply the outer dimshuffle on the new inc_subtensor,
# and return advanced_inc_subtensor1(x.T, i, y).T. # and return advanced_inc_subtensor1(x.T, i, y.T).T.
# Get the dimshuffle pattern to apply to y.
x_order = x.owner.op.new_order
y_order = ['x'] * x.ndim
for i, v in enumerate(x_order):
if v != 'x' and (v - dim_offset) >= 0:
y_order[v - dim_offset] = i
# Warn if this code path would have produced wrong results in the past
if config.warn.inc_set_subtensor1:
# Dimshuffle pattern for y that would be equivalent to past code
prev_y_order = ['x'] * (dim_offset) + list(range(y.ndim))
if y_order != prev_y_order:
warnings.warn(
'Although your current code is fine, please note that '
'earlier versions prior to 0.7 (or this development '
'version) may have yielded an incorrect result in '
'this `inc_subtensor` or `set_subtensor` operation. '
'To remove this warning, you can either set the '
'`warn.inc_set_subtensor1` config option to `False`, '
'or `warn.ignore_bug_before` to at least "0.7".',
stacklevel=2)
inner_incsubtensor = inc_subtensor( inner_incsubtensor = inc_subtensor(
inner_x, y, inner_x,
y.dimshuffle(y_order),
inplace=inplace, inplace=inplace,
set_instead_of_inc=set_instead_of_inc, set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing) tolerate_inplace_aliasing=tolerate_inplace_aliasing)
return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:]) return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
elif isinstance(x.owner.op, theano.tensor.Reshape): elif isinstance(x.owner.op, theano.tensor.Reshape):
# This case happens when the indices are not arranged as a vector, but
# as a higher-dimensional array. This is handled by the subtensor
# by flattening this list, taking the subtensor, then reshaping the
# result.
inner_x = x.owner.inputs[0] inner_x = x.owner.inputs[0]
# Try to apply inc_subtensor on inner_x. # Try to apply inc_subtensor on inner_x.
# If it works, there is no need to reshape, as the inc_subtensor # If it works, there is no need to reshape, as the inc_subtensor
# will have the same shape as inner_x, which is what we want. # will have the same shape as inner_x, which is what we want.
# We also explicitly duplicate y to its broadcasted shape
# before we partially flatten it to inner_x dimension. This is
# not strictly needed in all cases, but it is easier this way.
if y.ndim > 0:
# This if is needed to prevent some useless warning about
# old code bug.
expanded_y = alloc(y, *[x.shape[i] for i in range(x.ndim)])
flattened_y = expanded_y.flatten(inner_x.ndim)
else:
flattened_y = y
# Warn if this code path would have produced wrong results in the past
if config.warn.inc_set_subtensor1:
if inner_x.ndim > 1 and sum(y.broadcastable) > 0:
warnings.warn(
'Although your current code is fine, please note that '
'earlier versions prior to 0.7 (or this development '
'version) may have yielded an incorrect result in '
'this `inc_subtensor` or `set_subtensor` operation. '
'To remove this warning, you can either set the '
'`warn.inc_set_subtensor1` config option to `False`, '
'or `warn.ignore_bug_before` to at least "0.7".',
stacklevel=2)
inner_incsubtensor = inc_subtensor( inner_incsubtensor = inc_subtensor(
inner_x, y.flatten(), inner_x,
flattened_y,
inplace=inplace, inplace=inplace,
set_instead_of_inc=set_instead_of_inc, set_instead_of_inc=set_instead_of_inc,
tolerate_inplace_aliasing=tolerate_inplace_aliasing) tolerate_inplace_aliasing=tolerate_inplace_aliasing)
...@@ -1784,6 +1839,11 @@ class AdvancedIncSubtensor1(Op): ...@@ -1784,6 +1839,11 @@ class AdvancedIncSubtensor1(Op):
# broadcasted to fill all relevant rows of `x`. # broadcasted to fill all relevant rows of `x`.
assert y.ndim <= x.ndim # Should be guaranteed by `make_node` assert y.ndim <= x.ndim # Should be guaranteed by `make_node`
if y.ndim == x.ndim: if y.ndim == x.ndim:
if len(y) == 1:
# Allow broadcasting of y[0]
for i in idx:
x[i] += y[0]
else:
assert len(y) == len(idx) assert len(y) == len(idx)
for (j, i) in enumerate(idx): for (j, i) in enumerate(idx):
x[i] += y[j] x[i] += y[j]
......
...@@ -1037,6 +1037,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -1037,6 +1037,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
m1 = set_subtensor(m[:, i], 0) m1 = set_subtensor(m[:, i], 0)
m2 = inc_subtensor(m[:, i], 1) m2 = inc_subtensor(m[:, i], 1)
f = theano.function([m, i], [m1, m2]) f = theano.function([m, i], [m1, m2])
m_val = rand(5, 7) m_val = rand(5, 7)
...@@ -1052,6 +1053,80 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin): ...@@ -1052,6 +1053,80 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref) assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref) assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
def test_adv1_inc_sub_notlastdim_1_2dval_broadcast(self):
# Test that taking 1-dimensional advanced indexing
# over a dimension that's not the first (outer-most),
# and incrementing/setting with broadcast
m = matrix('m')
# Test for both vector and matrix as index
sym_i = (lvector('i'), lmatrix('i'))
shape_i = ((4,), (4, 2))
shape_val = ((3, 1), (3, 1, 1))
# Disable the warning emitted for that case
orig_warn = config.warn.inc_set_subtensor1
try:
config.warn.inc_set_subtensor1 = False
for i, shp_i, shp_v in zip(sym_i, shape_i, shape_val):
sub_m = m[:, i]
m1 = set_subtensor(sub_m, numpy.zeros(shp_v))
m2 = inc_subtensor(sub_m, numpy.ones(shp_v))
f = theano.function([m, i], [m1, m2])
m_val = rand(3, 5)
i_val = randint_ranged(min=0, max=4, shape=shp_i)
m1_ref = m_val.copy()
m2_ref = m_val.copy()
m1_val, m2_val = f(m_val, i_val)
for idx in i_val.ravel():
m1_ref[:, idx] = 0
m2_ref[:, idx] += 1
assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
finally:
config.warn.inc_set_subtensor1 = orig_warn
def test_adv1_inc_sub_notlastdim_1_2dval_no_broadcast(self):
# Test that taking 1-dimensional advanced indexing
# over a dimension that's not the first (outer-most),
# and incrementing/setting without broadcast
m = matrix('m')
# Test for both vector and matrix as index
sym_i = (lvector('i'), lmatrix('i'))
shape_i = ((4,), (4, 2))
shape_val = ((3, 4), (3, 4, 2))
# Disable the warning emitted for that case
orig_warn = config.warn.inc_set_subtensor1
try:
config.warn.inc_set_subtensor1 = False
for i, shp_i, shp_v in zip(sym_i, shape_i, shape_val):
sub_m = m[:, i]
m1 = set_subtensor(sub_m, numpy.zeros(shp_v))
m2 = inc_subtensor(sub_m, numpy.ones(shp_v))
f = theano.function([m, i], [m1, m2])
m_val = rand(3, 5)
i_val = randint_ranged(min=0, max=4, shape=shp_i)
m1_ref = m_val.copy()
m2_ref = m_val.copy()
m1_val, m2_val = f(m_val, i_val)
for idx in i_val:
m1_ref[:, idx] = 0
m2_ref[:, idx] += 1
assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
finally:
config.warn.inc_set_subtensor1 = orig_warn
class TestIncSubtensor1(unittest.TestCase): class TestIncSubtensor1(unittest.TestCase):
# test inc_subtensor # test inc_subtensor
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论