提交 8571cb47 authored 作者: Iulian Vlad Serban's avatar Iulian Vlad Serban

Continued work on adding stack traces to optimizations #3018

上级 2ebc24ff
...@@ -199,7 +199,7 @@ optdb.register('merge1', gof.MergeOptimizer(), ...@@ -199,7 +199,7 @@ optdb.register('merge1', gof.MergeOptimizer(),
# rearranges elemwise expressions # rearranges elemwise expressions
optdb.register('canonicalize', gof.EquilibriumDB(ignore_newtrees=False), optdb.register('canonicalize', gof.EquilibriumDB(ignore_newtrees=False),
1, 'fast_run', 'fast_compile') 1, 'fast_run', 'fast_compile', 'canonicalize_db')
# Register in the canonizer Equilibrium as a clean up opt the merge opt. # Register in the canonizer Equilibrium as a clean up opt the merge opt.
# Without this, as the equilibrium have ignore_newtrees=False, we # Without this, as the equilibrium have ignore_newtrees=False, we
# won't merge all nodes if it is set as a global optimizer with # won't merge all nodes if it is set as a global optimizer with
......
...@@ -1841,7 +1841,7 @@ def local_subtensor_make_vector(node): ...@@ -1841,7 +1841,7 @@ def local_subtensor_make_vector(node):
elif isinstance(idx, Variable): elif isinstance(idx, Variable):
if idx.ndim == 0: if idx.ndim == 0:
# if it is a constant we can do something with it # if it is a constant we can do something with it
try: try:
v = get_scalar_constant_value(idx) v = get_scalar_constant_value(idx)
if isinstance(v, numpy.integer): if isinstance(v, numpy.integer):
# Python 2.4 wants to index only with Python integers # Python 2.4 wants to index only with Python integers
...@@ -1851,6 +1851,8 @@ def local_subtensor_make_vector(node): ...@@ -1851,6 +1851,8 @@ def local_subtensor_make_vector(node):
ret = [x.owner.inputs[v]] ret = [x.owner.inputs[v]]
except IndexError: except IndexError:
raise NotScalarConstantError("Bad user graph!") raise NotScalarConstantError("Bad user graph!")
# Copy over stack trace from previous output to new output
return ret return ret
except NotScalarConstantError: except NotScalarConstantError:
pass pass
...@@ -2960,8 +2962,20 @@ def local_subtensor_of_dot(node): ...@@ -2960,8 +2962,20 @@ def local_subtensor_of_dot(node):
a_sub = a.__getitem__(tuple(a_indices)) a_sub = a.__getitem__(tuple(a_indices))
b_sub = b.__getitem__(tuple(b_indices)) if b_indices else b b_sub = b.__getitem__(tuple(b_indices)) if b_indices else b
return [T.dot(a_sub, b_sub)] # Copy over previous output stacktrace to a_sub and b_sub,
# because an error in the subtensor operation (e.g. an index error)
# on either a or b must correspond to an error in the
# subtensor operation on their dot product.
copy_stack_trace(node.outputs[0], [a_sub, b_sub])
# Copy over previous output stacktrace and previous dot product stacktrace,
# because an error here may correspond to an either in either the original
# dot product, or in the dot product after the subtensor operation.
r = T.dot(a_sub, b_sub)
copy_stack_trace([node.outputs[0], node.inputs[0]], r)
return [r]
@register_canonicalize @register_canonicalize
...@@ -3016,6 +3030,11 @@ def local_IncSubtensor_serialize(node): ...@@ -3016,6 +3030,11 @@ def local_IncSubtensor_serialize(node):
new_inputs = ([i for i in node.inputs if not movable(i)] + new_inputs = ([i for i in node.inputs if not movable(i)] +
[mi.owner.inputs[0] for mi in movable_inputs]) [mi.owner.inputs[0] for mi in movable_inputs])
new_add = T.add(*new_inputs) new_add = T.add(*new_inputs)
# Copy over stacktrace from original output, as an error
# (e.g. an index error) in this add operation should
# correspond to an error in the original add operation.
copy_stack_trace(node.outputs[0], new_add)
# stack up the new incsubtensors # stack up the new incsubtensors
tip = new_add tip = new_add
...@@ -3023,6 +3042,11 @@ def local_IncSubtensor_serialize(node): ...@@ -3023,6 +3042,11 @@ def local_IncSubtensor_serialize(node):
assert tip.type == o_type assert tip.type == o_type
assert tip.type == mi.owner.inputs[0].type assert tip.type == mi.owner.inputs[0].type
tip = mi.owner.op(tip, *mi.owner.inputs[1:]) tip = mi.owner.op(tip, *mi.owner.inputs[1:])
# Copy over stacktrace from outputs of the original
# "movable" operation to the new operation.
# Julian: Do we want to also include the stacktace of the output (node.outputs[0])?
copy_stack_trace(mi.owner.outputs, tip)
return [tip] return [tip]
# print incsub_inputs, [id(i.owner.inputs[0]) for i in incsub_inputs] # print incsub_inputs, [id(i.owner.inputs[0]) for i in incsub_inputs]
...@@ -3052,6 +3076,11 @@ def local_inplace_setsubtensor(node): ...@@ -3052,6 +3076,11 @@ def local_inplace_setsubtensor(node):
set_instead_of_inc=node.op.set_instead_of_inc, set_instead_of_inc=node.op.set_instead_of_inc,
destroyhandler_tolerate_aliased=dta) destroyhandler_tolerate_aliased=dta)
new_node = new_op(*node.inputs) new_node = new_op(*node.inputs)
# Copy stacktrace from original outputs to new outputs.
# This should be sensible, because the new operation is the
# same as the old one, but now with different attributes?
# Julian: Pascal, is this correct?
copy_stack_trace(node.outputs, new_node)
return [new_node] return [new_node]
return False return False
compile.optdb.register('local_inplace_setsubtensor', compile.optdb.register('local_inplace_setsubtensor',
...@@ -3070,6 +3099,12 @@ def local_inplace_incsubtensor1(node): ...@@ -3070,6 +3099,12 @@ def local_inplace_incsubtensor1(node):
if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace: if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace:
new_op = node.op.clone_inplace() new_op = node.op.clone_inplace()
new_node = new_op(*node.inputs) new_node = new_op(*node.inputs)
# Copy stacktrace from original outputs to new outputs.
# This should be sensible, because the new operation is the
# same as the old one, but now with different attributes?
# Julian: same as above, is this correct?
copy_stack_trace(node.outputs, new_node)
return [new_node] return [new_node]
return False return False
compile.optdb.register('local_inplace_incsubtensor1', compile.optdb.register('local_inplace_incsubtensor1',
...@@ -3104,6 +3139,8 @@ def local_incsubtensor_of_zeros(node): ...@@ -3104,6 +3139,8 @@ def local_incsubtensor_of_zeros(node):
pass pass
if replace: if replace:
# No need to copy over the stacktrace,
# because x should already have a stacktrace
return [x] return [x]
else: else:
return False return False
...@@ -3138,6 +3175,9 @@ def local_setsubtensor_of_constants(node): ...@@ -3138,6 +3175,9 @@ def local_setsubtensor_of_constants(node):
if (replace_x is not None and if (replace_x is not None and
replace_y is not None and replace_y is not None and
replace_x == replace_y): replace_x == replace_y):
# No need to copy over the stacktrace,
# because x should already have a stacktrace
return [x] return [x]
else: else:
return False return False
...@@ -3184,7 +3224,13 @@ def local_adv_sub1_adv_inc_sub1(node): ...@@ -3184,7 +3224,13 @@ def local_adv_sub1_adv_inc_sub1(node):
return [y] return [y]
# It is possible that y is upcast or downcast to x.dtype. # It is possible that y is upcast or downcast to x.dtype.
# In all case, as we set or add with 0, we can just cast y. # In all case, as we set or add with 0, we can just cast y.
return [T.cast(y, node.outputs[0].dtype)] r = T.cast(y, node.outputs[0].dtype)
# Copy over stacktrace from before casting, since
# we don't expect problems in the casting operation,
# and any problems in the indexing would have been spotted above.
copy_stack_trace(y, r)
return [r]
@register_specialize @register_specialize
...@@ -3287,7 +3333,14 @@ def local_useless_inc_subtensor_alloc(node): ...@@ -3287,7 +3333,14 @@ def local_useless_inc_subtensor_alloc(node):
msg = '`x[i]` and `y` do not have the same shape.' msg = '`x[i]` and `y` do not have the same shape.'
z = Assert(msg)(z, *cond) z = Assert(msg)(z, *cond)
return [node.op(x, z, *i)] r = node.op(x, z, *i)
# Copy over stacktrace from previous output, since
# we don't expect problems when removing the intermediate
# alloc operation and so we still want to point at the line
# of the inc_subtensor operation.
copy_stack_trace(node.outputs, r)
return [r]
#################### ####################
...@@ -3306,6 +3359,8 @@ def local_useless_rebroadcast(node): ...@@ -3306,6 +3359,8 @@ def local_useless_rebroadcast(node):
x = node.inputs[0] x = node.inputs[0]
if numpy.all(x.broadcastable == node.outputs[0].broadcastable): if numpy.all(x.broadcastable == node.outputs[0].broadcastable):
# No broadcastable flag was modified # No broadcastable flag was modified
# No need to copy over stack trace,
# because x should already have a stack trace.
return [x] return [x]
else: else:
# Keep the flags that modify something # Keep the flags that modify something
...@@ -3317,7 +3372,10 @@ def local_useless_rebroadcast(node): ...@@ -3317,7 +3372,10 @@ def local_useless_rebroadcast(node):
# All flags are useful # All flags are useful
return return
else: else:
return [T.Rebroadcast(*list(new_axis.items()))(x)] r = T.Rebroadcast(*list(new_axis.items()))(x)
# Copy over stacktrace from previous output
copy_stack_trace(node.outputs, r)
return [r]
@register_canonicalize @register_canonicalize
......
...@@ -1622,6 +1622,11 @@ def test_local_useless_slice(): ...@@ -1622,6 +1622,11 @@ def test_local_useless_slice():
subtens = apply_node.op subtens = apply_node.op
assert not any(isinstance(idx, slice) for idx in subtens.idx_list), "Slice should be gone" assert not any(isinstance(idx, slice) for idx in subtens.idx_list), "Slice should be gone"
# Now test that the stack trace is copied over properly,
# before before and after optimization.
assert hasattr(f_unopt.outputs[0].variable.tag, 'trace')
assert hasattr(f_opt.outputs[0].variable.tag, 'trace')
# test a 4d tensor # test a 4d tensor
z = tensor.tensor4('z') z = tensor.tensor4('z')
o2 = z[1, :, :, 1] o2 = z[1, :, :, 1]
...@@ -1638,6 +1643,10 @@ def test_local_useless_slice(): ...@@ -1638,6 +1643,10 @@ def test_local_useless_slice():
subtens = apply_node.op subtens = apply_node.op
assert not any(isinstance(idx, slice) for idx in subtens.idx_list) assert not any(isinstance(idx, slice) for idx in subtens.idx_list)
# Finally, test that the stack trace is copied over properly,
# before before and after optimization.
assert hasattr(f_opt_check.outputs[0].variable.tag, 'trace')
assert hasattr(f_opt_check_apply.outputs[0].variable.tag, 'trace')
def test_local_useless_inc_subtensor(): def test_local_useless_inc_subtensor():
x = tensor.matrix('x') x = tensor.matrix('x')
...@@ -1851,17 +1860,23 @@ class test_local_subtensor_make_vector(unittest.TestCase): ...@@ -1851,17 +1860,23 @@ class test_local_subtensor_make_vector(unittest.TestCase):
def test_stacktrace(self): def test_stacktrace(self):
x, y, z = tensor.lscalars('xyz') x, y, z = tensor.lscalars('xyz')
v = make_vector(x, y, z) v = make_vector(x, y, z)
#mode = theano.compile.mode.get_default_mode().including("local_subtensor_make_vector")
# Compile function using only the 'local_subtensor_make_vector' optimization,
# which requires us to add the 'canonicalize' phase.
mode = theano.compile.mode.Mode(optimizer=None).including('canonicalize_db').including("local_subtensor_make_vector")
f = function([x, y, z], v[0], mode=mode)
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
#import ipdb; ipdb.set_trace()
# Compile function using all optimizations in fast_compile mode,
# including the 'local_subtensor_make_vector' optimization
mode = theano.compile.mode.get_mode('FAST_COMPILE').including("local_subtensor_make_vector") mode = theano.compile.mode.get_mode('FAST_COMPILE').including("local_subtensor_make_vector")
f = function([x, y, z], v[0], mode=mode) f = function([x, y, z], v[0], mode=mode)
# TODO Pascal is there some way I can disable ALL optimizations except the 'local_subtensor_make_vector' opt?
# Right now there is some other optimization removing the stack trace
print ('Before optimization')
print (v[0].tag)
print ('After optimization')
print (f.outputs[0].tag)
# Check stacktrace was copied over correctly after opt was applied # Check stacktrace was copied over correctly after opt was applied
#self.assertTrue(hasattr(f.outputs[0].tag, 'trace')) self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
class test_local_subtensor_lift(unittest.TestCase): class test_local_subtensor_lift(unittest.TestCase):
def _verify_stack_trace(self, f): def _verify_stack_trace(self, f):
...@@ -2661,6 +2676,32 @@ class test_local_adv_sub1_adv_inc_sub1(unittest.TestCase): ...@@ -2661,6 +2676,32 @@ class test_local_adv_sub1_adv_inc_sub1(unittest.TestCase):
self.assertRaises((AssertionError, ValueError), self.assertRaises((AssertionError, ValueError),
f, dx, dy, [1]) f, dx, dy, [1])
def test_stacktrace(self):
x = tensor.matrix("x")
y = tensor.matrix("y")
idx = tensor.ivector()
dx = numpy.random.rand(4, 5).astype(config.floatX)
dy = numpy.random.rand(2, 5).astype(config.floatX)
didx = numpy.asarray([1, 3], "int32")
# set_subtensor
inc = tensor.set_subtensor(x[idx], y)
o = inc[idx]
# Compile function using only the 'local_subtensor_make_vector' optimization,
# which requires us to add the 'canonicalize' phase.
mode = theano.compile.mode.Mode(optimizer=None).including('canonicalize').including("local_adv_sub1_adv_inc_sub1")
f = theano.function([x, y, idx], o, self.mode)
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
# Compile function using all optimizations in fast_compile mode,
# including the 'local_subtensor_make_vector' optimization
mode = theano.compile.mode.get_mode('FAST_COMPILE').including("local_adv_sub1_adv_inc_sub1")
f = theano.function([x, y, idx], o, self.mode)
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
class Test_alloc_zero(unittest.TestCase): class Test_alloc_zero(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -2861,7 +2902,11 @@ def test_local_IncSubtensor_serialize(): ...@@ -2861,7 +2902,11 @@ def test_local_IncSubtensor_serialize():
tensor.AdvancedIncSubtensor1)) tensor.AdvancedIncSubtensor1))
for inp in a.inputs]) for inp in a.inputs])
# Now test that the stack trace is copied over properly,
# if we return the gradients. We need to use same mode as before.
f = theano.function([i, j, t], dW, mode=mode)
assert hasattr(f.outputs[0].variable.tag, 'trace')
def test_local_set_to_inc_subtensor(): def test_local_set_to_inc_subtensor():
v = theano.tensor.fmatrix() v = theano.tensor.fmatrix()
s = v[[2, 1]] s = v[[2, 1]]
...@@ -2890,7 +2935,12 @@ def test_local_set_to_inc_subtensor(): ...@@ -2890,7 +2935,12 @@ def test_local_set_to_inc_subtensor():
utt.assert_allclose(r1, r2) utt.assert_allclose(r1, r2)
# Finally, test that the stack trace is copied over properly,
# before before and after optimization.
assert hasattr(f1.outputs[0].variable.tag, 'trace')
assert hasattr(f2.outputs[0].variable.tag, 'trace')
def test_local_subtensor_of_dot(): def test_local_subtensor_of_dot():
m1 = theano.tensor.matrix() m1 = theano.tensor.matrix()
m2 = theano.tensor.matrix() m2 = theano.tensor.matrix()
...@@ -2922,10 +2972,16 @@ def test_local_subtensor_of_dot(): ...@@ -2922,10 +2972,16 @@ def test_local_subtensor_of_dot():
f = theano.function([m1, m2, idx], theano.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode) f = theano.function([m1, m2, idx], theano.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode)
assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1, 1:4, :, 1:]) assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1, 1:4, :, 1:])
# if we return the gradients. We need to use same mode as before.
assert hasattr(f.outputs[0].variable.tag, 'trace')
f = theano.function([m1, m2, idx], theano.dot(m1, m2)[1:4, :, idx:, idx], mode=mode) f = theano.function([m1, m2, idx], theano.dot(m1, m2)[1:4, :, idx:, idx], mode=mode)
assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1:4, :, 1:, 1]) assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1:4, :, 1:, 1])
# Now test that the stack trace is copied over properly,
# if we return the gradients. We need to use same mode as before.
assert hasattr(f.outputs[0].variable.tag, 'trace')
class Test_local_elemwise_alloc(unittest.TestCase): class Test_local_elemwise_alloc(unittest.TestCase):
dtype = config.floatX dtype = config.floatX
...@@ -3428,6 +3484,11 @@ class Test_local_useless_elemwise_comparison(unittest.TestCase): ...@@ -3428,6 +3484,11 @@ class Test_local_useless_elemwise_comparison(unittest.TestCase):
class Test_local_useless_alloc(unittest.TestCase): class Test_local_useless_alloc(unittest.TestCase):
def _verify_stack_trace(self, f):
for output in f.outputs:
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(output.variable.tag, 'trace'))
def setUp(self): def setUp(self):
self.rng = numpy.random.RandomState(utt.fetch_seed()) self.rng = numpy.random.RandomState(utt.fetch_seed())
...@@ -3448,6 +3509,8 @@ class Test_local_useless_alloc(unittest.TestCase): ...@@ -3448,6 +3509,8 @@ class Test_local_useless_alloc(unittest.TestCase):
if isinstance(mode_opt, compile.DebugMode): if isinstance(mode_opt, compile.DebugMode):
self.assertRaises(ValueError, f) self.assertRaises(ValueError, f)
self._verify_stack_trace(f)
def test1(self): def test1(self):
# Test that alloc never gets instantiated during optimization # Test that alloc never gets instantiated during optimization
mode = mode_opt.excluding('local_useless_alloc') mode = mode_opt.excluding('local_useless_alloc')
...@@ -3461,6 +3524,8 @@ class Test_local_useless_alloc(unittest.TestCase): ...@@ -3461,6 +3524,8 @@ class Test_local_useless_alloc(unittest.TestCase):
op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()] op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
assert tensor.Alloc not in op_classes assert tensor.Alloc not in op_classes
self._verify_stack_trace(f)
def test2(self): def test2(self):
# Test that alloc never gets instantiated during optimization # Test that alloc never gets instantiated during optimization
mode = mode_opt.excluding('local_useless_alloc') mode = mode_opt.excluding('local_useless_alloc')
...@@ -3479,10 +3544,17 @@ class Test_local_useless_alloc(unittest.TestCase): ...@@ -3479,10 +3544,17 @@ class Test_local_useless_alloc(unittest.TestCase):
# in op_classes and we have to change the assert. # in op_classes and we have to change the assert.
assert tensor.Alloc in op_classes assert tensor.Alloc in op_classes
self._verify_stack_trace(f)
class Test_local_useless_inc_subtensor_alloc(unittest.TestCase): class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
opt_name = 'local_useless_inc_subtensor_alloc' opt_name = 'local_useless_inc_subtensor_alloc'
def _verify_stack_trace(self, f):
for output in f.outputs:
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(output.variable.tag, 'trace'))
def setUp(self): def setUp(self):
# The optimization requires the shape feature so we need to compile in # The optimization requires the shape feature so we need to compile in
# FAST_RUN mode. # FAST_RUN mode.
...@@ -3519,6 +3591,10 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase): ...@@ -3519,6 +3591,10 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
r2 = f2(x_value, i_value, y_value) r2 = f2(x_value, i_value, y_value)
utt.assert_allclose(r1, r2) utt.assert_allclose(r1, r2)
self._verify_stack_trace(f1)
self._verify_stack_trace(f2)
def test_advanced_inc_subtensor1(self): def test_advanced_inc_subtensor1(self):
if tensor.inplace_increment is None: if tensor.inplace_increment is None:
...@@ -3548,6 +3624,9 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase): ...@@ -3548,6 +3624,9 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
r2 = f2(x_value, i_value, y_value) r2 = f2(x_value, i_value, y_value)
utt.assert_allclose(r1, r2) utt.assert_allclose(r1, r2)
self._verify_stack_trace(f1)
self._verify_stack_trace(f2)
def test_incsubtensor(self): def test_incsubtensor(self):
x = tensor.vector('x') x = tensor.vector('x')
...@@ -3574,6 +3653,9 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase): ...@@ -3574,6 +3653,9 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
r2 = f2(x_value, i_value, y_value) r2 = f2(x_value, i_value, y_value)
utt.assert_allclose(r1, r2) utt.assert_allclose(r1, r2)
self._verify_stack_trace(f1)
self._verify_stack_trace(f2)
class test_shapeoptimizer(unittest.TestCase): class test_shapeoptimizer(unittest.TestCase):
...@@ -4082,6 +4164,8 @@ class T_Rebroadcast(unittest.TestCase): ...@@ -4082,6 +4164,8 @@ class T_Rebroadcast(unittest.TestCase):
e = f.maker.fgraph.toposort() e = f.maker.fgraph.toposort()
assert len([n for n in e if isinstance(n.op, T.Rebroadcast)]) == 0 assert len([n for n in e if isinstance(n.op, T.Rebroadcast)]) == 0
assert hasattr(f.outputs[0].variable.tag, 'trace')
def test_rebroadcast_rebroadcast(self): def test_rebroadcast_rebroadcast(self):
mode = theano.compile.get_default_mode().including('canonicalize') mode = theano.compile.get_default_mode().including('canonicalize')
m = T.matrix() m = T.matrix()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论