提交 8571cb47 authored 作者: Iulian Vlad Serban's avatar Iulian Vlad Serban

Continued work on adding stack traces to optimizations #3018

上级 2ebc24ff
......@@ -199,7 +199,7 @@ optdb.register('merge1', gof.MergeOptimizer(),
# rearranges elemwise expressions
optdb.register('canonicalize', gof.EquilibriumDB(ignore_newtrees=False),
1, 'fast_run', 'fast_compile')
1, 'fast_run', 'fast_compile', 'canonicalize_db')
# Register in the canonizer Equilibrium as a clean up opt the merge opt.
# Without this, as the equilibrium have ignore_newtrees=False, we
# won't merge all nodes if it is set as a global optimizer with
......
......@@ -1841,7 +1841,7 @@ def local_subtensor_make_vector(node):
elif isinstance(idx, Variable):
if idx.ndim == 0:
# if it is a constant we can do something with it
try:
try:
v = get_scalar_constant_value(idx)
if isinstance(v, numpy.integer):
# Python 2.4 wants to index only with Python integers
......@@ -1851,6 +1851,8 @@ def local_subtensor_make_vector(node):
ret = [x.owner.inputs[v]]
except IndexError:
raise NotScalarConstantError("Bad user graph!")
# Copy over stack trace from previous output to new output
return ret
except NotScalarConstantError:
pass
......@@ -2960,8 +2962,20 @@ def local_subtensor_of_dot(node):
a_sub = a.__getitem__(tuple(a_indices))
b_sub = b.__getitem__(tuple(b_indices)) if b_indices else b
return [T.dot(a_sub, b_sub)]
# Copy over previous output stacktrace to a_sub and b_sub,
# because an error in the subtensor operation (e.g. an index error)
# on either a or b must correspond to an error in the
# subtensor operation on their dot product.
copy_stack_trace(node.outputs[0], [a_sub, b_sub])
# Copy over previous output stacktrace and previous dot product stacktrace,
# because an error here may correspond to an either in either the original
# dot product, or in the dot product after the subtensor operation.
r = T.dot(a_sub, b_sub)
copy_stack_trace([node.outputs[0], node.inputs[0]], r)
return [r]
@register_canonicalize
......@@ -3016,6 +3030,11 @@ def local_IncSubtensor_serialize(node):
new_inputs = ([i for i in node.inputs if not movable(i)] +
[mi.owner.inputs[0] for mi in movable_inputs])
new_add = T.add(*new_inputs)
# Copy over stacktrace from original output, as an error
# (e.g. an index error) in this add operation should
# correspond to an error in the original add operation.
copy_stack_trace(node.outputs[0], new_add)
# stack up the new incsubtensors
tip = new_add
......@@ -3023,6 +3042,11 @@ def local_IncSubtensor_serialize(node):
assert tip.type == o_type
assert tip.type == mi.owner.inputs[0].type
tip = mi.owner.op(tip, *mi.owner.inputs[1:])
# Copy over stacktrace from outputs of the original
# "movable" operation to the new operation.
# Julian: Do we want to also include the stacktace of the output (node.outputs[0])?
copy_stack_trace(mi.owner.outputs, tip)
return [tip]
# print incsub_inputs, [id(i.owner.inputs[0]) for i in incsub_inputs]
......@@ -3052,6 +3076,11 @@ def local_inplace_setsubtensor(node):
set_instead_of_inc=node.op.set_instead_of_inc,
destroyhandler_tolerate_aliased=dta)
new_node = new_op(*node.inputs)
# Copy stacktrace from original outputs to new outputs.
# This should be sensible, because the new operation is the
# same as the old one, but now with different attributes?
# Julian: Pascal, is this correct?
copy_stack_trace(node.outputs, new_node)
return [new_node]
return False
compile.optdb.register('local_inplace_setsubtensor',
......@@ -3070,6 +3099,12 @@ def local_inplace_incsubtensor1(node):
if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace:
new_op = node.op.clone_inplace()
new_node = new_op(*node.inputs)
# Copy stacktrace from original outputs to new outputs.
# This should be sensible, because the new operation is the
# same as the old one, but now with different attributes?
# Julian: same as above, is this correct?
copy_stack_trace(node.outputs, new_node)
return [new_node]
return False
compile.optdb.register('local_inplace_incsubtensor1',
......@@ -3104,6 +3139,8 @@ def local_incsubtensor_of_zeros(node):
pass
if replace:
# No need to copy over the stacktrace,
# because x should already have a stacktrace
return [x]
else:
return False
......@@ -3138,6 +3175,9 @@ def local_setsubtensor_of_constants(node):
if (replace_x is not None and
replace_y is not None and
replace_x == replace_y):
# No need to copy over the stacktrace,
# because x should already have a stacktrace
return [x]
else:
return False
......@@ -3184,7 +3224,13 @@ def local_adv_sub1_adv_inc_sub1(node):
return [y]
# It is possible that y is upcast or downcast to x.dtype.
# In all case, as we set or add with 0, we can just cast y.
return [T.cast(y, node.outputs[0].dtype)]
r = T.cast(y, node.outputs[0].dtype)
# Copy over stacktrace from before casting, since
# we don't expect problems in the casting operation,
# and any problems in the indexing would have been spotted above.
copy_stack_trace(y, r)
return [r]
@register_specialize
......@@ -3287,7 +3333,14 @@ def local_useless_inc_subtensor_alloc(node):
msg = '`x[i]` and `y` do not have the same shape.'
z = Assert(msg)(z, *cond)
return [node.op(x, z, *i)]
r = node.op(x, z, *i)
# Copy over stacktrace from previous output, since
# we don't expect problems when removing the intermediate
# alloc operation and so we still want to point at the line
# of the inc_subtensor operation.
copy_stack_trace(node.outputs, r)
return [r]
####################
......@@ -3306,6 +3359,8 @@ def local_useless_rebroadcast(node):
x = node.inputs[0]
if numpy.all(x.broadcastable == node.outputs[0].broadcastable):
# No broadcastable flag was modified
# No need to copy over stack trace,
# because x should already have a stack trace.
return [x]
else:
# Keep the flags that modify something
......@@ -3317,7 +3372,10 @@ def local_useless_rebroadcast(node):
# All flags are useful
return
else:
return [T.Rebroadcast(*list(new_axis.items()))(x)]
r = T.Rebroadcast(*list(new_axis.items()))(x)
# Copy over stacktrace from previous output
copy_stack_trace(node.outputs, r)
return [r]
@register_canonicalize
......
......@@ -1622,6 +1622,11 @@ def test_local_useless_slice():
subtens = apply_node.op
assert not any(isinstance(idx, slice) for idx in subtens.idx_list), "Slice should be gone"
# Now test that the stack trace is copied over properly,
# before before and after optimization.
assert hasattr(f_unopt.outputs[0].variable.tag, 'trace')
assert hasattr(f_opt.outputs[0].variable.tag, 'trace')
# test a 4d tensor
z = tensor.tensor4('z')
o2 = z[1, :, :, 1]
......@@ -1638,6 +1643,10 @@ def test_local_useless_slice():
subtens = apply_node.op
assert not any(isinstance(idx, slice) for idx in subtens.idx_list)
# Finally, test that the stack trace is copied over properly,
# before before and after optimization.
assert hasattr(f_opt_check.outputs[0].variable.tag, 'trace')
assert hasattr(f_opt_check_apply.outputs[0].variable.tag, 'trace')
def test_local_useless_inc_subtensor():
x = tensor.matrix('x')
......@@ -1851,17 +1860,23 @@ class test_local_subtensor_make_vector(unittest.TestCase):
def test_stacktrace(self):
x, y, z = tensor.lscalars('xyz')
v = make_vector(x, y, z)
#mode = theano.compile.mode.get_default_mode().including("local_subtensor_make_vector")
# Compile function using only the 'local_subtensor_make_vector' optimization,
# which requires us to add the 'canonicalize' phase.
mode = theano.compile.mode.Mode(optimizer=None).including('canonicalize_db').including("local_subtensor_make_vector")
f = function([x, y, z], v[0], mode=mode)
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
#import ipdb; ipdb.set_trace()
# Compile function using all optimizations in fast_compile mode,
# including the 'local_subtensor_make_vector' optimization
mode = theano.compile.mode.get_mode('FAST_COMPILE').including("local_subtensor_make_vector")
f = function([x, y, z], v[0], mode=mode)
# TODO Pascal is there some way I can disable ALL optimizations except the 'local_subtensor_make_vector' opt?
# Right now there is some other optimization removing the stack trace
print ('Before optimization')
print (v[0].tag)
print ('After optimization')
print (f.outputs[0].tag)
# Check stacktrace was copied over correctly after opt was applied
#self.assertTrue(hasattr(f.outputs[0].tag, 'trace'))
self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
class test_local_subtensor_lift(unittest.TestCase):
def _verify_stack_trace(self, f):
......@@ -2661,6 +2676,32 @@ class test_local_adv_sub1_adv_inc_sub1(unittest.TestCase):
self.assertRaises((AssertionError, ValueError),
f, dx, dy, [1])
def test_stacktrace(self):
x = tensor.matrix("x")
y = tensor.matrix("y")
idx = tensor.ivector()
dx = numpy.random.rand(4, 5).astype(config.floatX)
dy = numpy.random.rand(2, 5).astype(config.floatX)
didx = numpy.asarray([1, 3], "int32")
# set_subtensor
inc = tensor.set_subtensor(x[idx], y)
o = inc[idx]
# Compile function using only the 'local_subtensor_make_vector' optimization,
# which requires us to add the 'canonicalize' phase.
mode = theano.compile.mode.Mode(optimizer=None).including('canonicalize').including("local_adv_sub1_adv_inc_sub1")
f = theano.function([x, y, idx], o, self.mode)
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
# Compile function using all optimizations in fast_compile mode,
# including the 'local_subtensor_make_vector' optimization
mode = theano.compile.mode.get_mode('FAST_COMPILE').including("local_adv_sub1_adv_inc_sub1")
f = theano.function([x, y, idx], o, self.mode)
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(f.outputs[0].variable.tag, 'trace'))
class Test_alloc_zero(unittest.TestCase):
def setUp(self):
......@@ -2861,7 +2902,11 @@ def test_local_IncSubtensor_serialize():
tensor.AdvancedIncSubtensor1))
for inp in a.inputs])
# Now test that the stack trace is copied over properly,
# if we return the gradients. We need to use same mode as before.
f = theano.function([i, j, t], dW, mode=mode)
assert hasattr(f.outputs[0].variable.tag, 'trace')
def test_local_set_to_inc_subtensor():
v = theano.tensor.fmatrix()
s = v[[2, 1]]
......@@ -2890,7 +2935,12 @@ def test_local_set_to_inc_subtensor():
utt.assert_allclose(r1, r2)
# Finally, test that the stack trace is copied over properly,
# before before and after optimization.
assert hasattr(f1.outputs[0].variable.tag, 'trace')
assert hasattr(f2.outputs[0].variable.tag, 'trace')
def test_local_subtensor_of_dot():
m1 = theano.tensor.matrix()
m2 = theano.tensor.matrix()
......@@ -2922,10 +2972,16 @@ def test_local_subtensor_of_dot():
f = theano.function([m1, m2, idx], theano.dot(m1, m2)[idx, 1:4, :, idx:], mode=mode)
assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1, 1:4, :, 1:])
# if we return the gradients. We need to use same mode as before.
assert hasattr(f.outputs[0].variable.tag, 'trace')
f = theano.function([m1, m2, idx], theano.dot(m1, m2)[1:4, :, idx:, idx], mode=mode)
assert test_equality(f(d1, d2, 1), numpy.dot(d1, d2)[1:4, :, 1:, 1])
# Now test that the stack trace is copied over properly,
# if we return the gradients. We need to use same mode as before.
assert hasattr(f.outputs[0].variable.tag, 'trace')
class Test_local_elemwise_alloc(unittest.TestCase):
dtype = config.floatX
......@@ -3428,6 +3484,11 @@ class Test_local_useless_elemwise_comparison(unittest.TestCase):
class Test_local_useless_alloc(unittest.TestCase):
def _verify_stack_trace(self, f):
for output in f.outputs:
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(output.variable.tag, 'trace'))
def setUp(self):
self.rng = numpy.random.RandomState(utt.fetch_seed())
......@@ -3448,6 +3509,8 @@ class Test_local_useless_alloc(unittest.TestCase):
if isinstance(mode_opt, compile.DebugMode):
self.assertRaises(ValueError, f)
self._verify_stack_trace(f)
def test1(self):
# Test that alloc never gets instantiated during optimization
mode = mode_opt.excluding('local_useless_alloc')
......@@ -3461,6 +3524,8 @@ class Test_local_useless_alloc(unittest.TestCase):
op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
assert tensor.Alloc not in op_classes
self._verify_stack_trace(f)
def test2(self):
# Test that alloc never gets instantiated during optimization
mode = mode_opt.excluding('local_useless_alloc')
......@@ -3479,10 +3544,17 @@ class Test_local_useless_alloc(unittest.TestCase):
# in op_classes and we have to change the assert.
assert tensor.Alloc in op_classes
self._verify_stack_trace(f)
class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
opt_name = 'local_useless_inc_subtensor_alloc'
def _verify_stack_trace(self, f):
for output in f.outputs:
# Check stacktrace was copied over correctly after opt was applied
self.assertTrue(hasattr(output.variable.tag, 'trace'))
def setUp(self):
# The optimization requires the shape feature so we need to compile in
# FAST_RUN mode.
......@@ -3519,6 +3591,10 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
r2 = f2(x_value, i_value, y_value)
utt.assert_allclose(r1, r2)
self._verify_stack_trace(f1)
self._verify_stack_trace(f2)
def test_advanced_inc_subtensor1(self):
if tensor.inplace_increment is None:
......@@ -3548,6 +3624,9 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
r2 = f2(x_value, i_value, y_value)
utt.assert_allclose(r1, r2)
self._verify_stack_trace(f1)
self._verify_stack_trace(f2)
def test_incsubtensor(self):
x = tensor.vector('x')
......@@ -3574,6 +3653,9 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
r2 = f2(x_value, i_value, y_value)
utt.assert_allclose(r1, r2)
self._verify_stack_trace(f1)
self._verify_stack_trace(f2)
class test_shapeoptimizer(unittest.TestCase):
......@@ -4082,6 +4164,8 @@ class T_Rebroadcast(unittest.TestCase):
e = f.maker.fgraph.toposort()
assert len([n for n in e if isinstance(n.op, T.Rebroadcast)]) == 0
assert hasattr(f.outputs[0].variable.tag, 'trace')
def test_rebroadcast_rebroadcast(self):
mode = theano.compile.get_default_mode().including('canonicalize')
m = T.matrix()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论