提交 fa9a870d authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #4659 from julianser/master

Merged previous work implementing stack trace copy over and tests for…
......@@ -91,7 +91,7 @@ Optimization FAST_RUN FAST_COMPILE
* ``f(fill(a,b), c) -> f(b, c)``
* ``f(fill(a, b), fill(c, d), e) -> fill(a, fill(c, f(b, d, e)))``
See :func:`opt.local_fill_cut`, :func:`opt.local_fill_sink`
See :func:`opt.local_fill_sink`
inc_subtensor serialization
Incrementing a small subregion of a large tensor can be done quickly
......
......@@ -53,7 +53,7 @@ class MissingInputError(Exception):
# The call to list is needed for Python 3
assert list(kwargs.keys()) == ["variable"]
tr = getattr(list(kwargs.values())[0].tag, 'trace', [])
if type(tr) is list and len(tr) > 0:
if isinstance(tr, list) and len(tr) > 0:
sio = StringIO()
print("\nBacktrace when the variable is created:", file=sio)
for subtr in list(kwargs.values())[0].tag.trace:
......
......@@ -179,7 +179,7 @@ def raise_with_op(node, thunk=None, exc_info=None, storage_map=None):
# Print node backtraces
tr = getattr(node.outputs[0].tag, 'trace', [])
if type(tr) is list and len(tr) > 0:
if isinstance(tr, list) and len(tr) > 0:
detailed_err_msg += "\nBacktrace when the node is created(use Theano flag traceback.limit=N to make it longer):\n"
# Print separate message for each element in the list of batcktraces
......
......@@ -4222,7 +4222,18 @@ def local_flatten_lift(node):
isinstance(node.inputs[0].owner.op, T.Elemwise) and
len(node.inputs[0].owner.inputs) == 1):
f = node.op(node.inputs[0].owner.inputs[0])
# Copy over stacktrace from previous output node (flatten op),
# since this is the op which may cause an error for f.
copy_stack_trace(node.outputs, f)
e = node.inputs[0].owner.op(f)
# Copy over stacktrace from previous output node and from unary
# elementwise output node since if there was an error, it would
# probably have come from that operation.
copy_stack_trace(node.outputs + [node.inputs[0]], e)
return [e]
##################
......@@ -4243,6 +4254,12 @@ def local_reshape_chain(op):
# TODO: this can permit a failing program to run by eliminating
# the lower reshape
rval = node.op(node.inputs[0].owner.inputs[0], node.inputs[1])
# Copy over stacktrace from previous output node, as any error
# in new computational graph would have been caused by last op
# in the old computational graph.
copy_stack_trace(node.outputs, rval)
# It might happen that the desired output of this node has a
# broadcastable pattern that does not match that of 'rval'. This is
# when originally, we were able to figure out that one of the
......@@ -4365,7 +4382,6 @@ def local_reshape_to_dimshuffle(node):
- reshape(x, (1, n)) --> dimshuffle{x,0}(reshape(x, (n,))
- reshape(x, (1, m, 1, n, 1, 1))
--> dimshuffle{x,0,x,1,x,x}(reshape(x, (m, n)))
"""
op = node.op
if not isinstance(op, Reshape):
......@@ -4414,16 +4430,33 @@ def local_reshape_lift(node):
isinstance(node.inputs[0].owner.op, T.Elemwise) and
len(node.inputs[0].owner.inputs) == 1):
r = node.op(node.inputs[0].owner.inputs[0], node.inputs[1])
# Copy stacktrace from previous Reshape op, as an error in new
# Reshape op could only have been caused by old one.
copy_stack_trace(node.outputs, r)
e = node.inputs[0].owner.op(r)
# Copy stacktrace from both previous Reshape and UnaryElemwise op
# because an error in new cg could have been caused by either ops.
copy_stack_trace(node.outputs + node.inputs, e)
# In rare case the original broadcast was (False, True), but
# the new one is (False, False). So don't crash in that case.
if e.type != node.outputs[0].type:
e = T.patternbroadcast(e, node.outputs[0].broadcastable)
return [e]
re = T.patternbroadcast(e, node.outputs[0].broadcastable)
# Copy over stack trace.
# If the graph fails it is usually due to the fact that a dimension
# that should be broadcastable does not actually have length 1,
copy_stack_trace(e, re)
else:
re = e
return [re]
if 0:
# TODO: Test that this optimziation works.
# TODO: Once it works, copy over stacktrace appropriately.
@register_canonicalize
@gof.local_optimizer([T.Reshape])
def local_scalar_reshape(node):
......@@ -4440,6 +4473,7 @@ if 0:
# appropriately typed and broadcasted zero.
# TODO: Remember to take into account the new sum dtype argument if this
# optimization is enabled.
# TODO: Once it works, copy over stacktrace appropriately.
@register_canonicalize
@gof.local_optimizer([T.Sum])
def local_sum_over_empty(node):
......@@ -4463,60 +4497,6 @@ if 0:
# Middleman cuts #
##################
@gof.local_optimizer([T.Elemwise])
def local_fill_cut(node):
"""
f(fill(a,b), c) -> f(b, c)
If c.type == a.type.
"""
# this optimization is essentially for getting broadcasting to
# replace fill. This is always possible when using a Compound
# Elemwise operation, but it is not always possible without one
# (consider filling a large matrix with a scalar, and then adding
# another scalar. The only numbers that count are the two
# scalars, but we can't ignore the large matrix because it gives
# the shape of the result.
if node.op != T.Elemwise:
return False
output = node.outputs[0]
try:
# reference is some input with the same type as the output but
# that is not produced by a fill
reference = [input
for input in node.inputs
if input.type == output.type and
(not input.owner or input.owner.op != T.fill)][0]
except IndexError:
return False
new_inputs = []
new = False
for input in node.inputs:
if input.owner and input.owner.op == T.fill:
model, filling = input.owner.inputs
if encompasses_broadcastable(reference.type.broadcastable,
filling.type.broadcastable):
new_inputs.append(filling)
new = True
continue
new_inputs.append(input)
if not new:
return False
rval = node.op(*new_inputs)
if isinstance(rval, gof.Variable):
return rval.owner.outputs
else:
return rval[0].owner.outputs
register_canonicalize(local_fill_cut)
register_canonicalize(gof.OpRemove(T.tensor_copy), name='remove_tensor_copy')
################
......@@ -4972,6 +4952,9 @@ class Canonizer(gof.LocalOptimizer):
# This happen with test
# theano/tensor/tests/test_opt.py:T_local_switch_sink
new.tag.values_eq_approx = values_eq_approx_remove_inf_nan
# We need to implement the copy over of the stacktrace.
# See issue #5104.
return [new]
else:
_logger.warning(' '.join(('CANONIZE FAILED: new, out = ',
......@@ -5056,9 +5039,19 @@ def local_sum_prod_mul_by_scalar(node):
new_op_output = node.op(non_scalars[0])
else:
new_op_input = T.mul(*non_scalars)
# We assume that errors always come from the prod/mul op in the
# original computational graph, and therefore need to only
# copy over its output stacktrace.
copy_stack_trace(node.outputs, new_op_input)
new_op_input_nb_elements = new_op_input.size
new_op_output = node.op(new_op_input)
if not len(non_scalars) == 0:
# Copy over stacktrace from previous output to new mul op,
# for same reason as above.
copy_stack_trace(node.outputs, new_op_output)
# If node.op is a T.elemwise.Prod, then the scalars need to be
# raised to the power of the number of elements in the input
# to the Prod
......@@ -5074,12 +5067,28 @@ def local_sum_prod_mul_by_scalar(node):
mul_inputs.append(new_op_output)
if len(mul_inputs) == 1:
# Copy over stacktrace from previous output to new mul op,
# for same reason as above.
copy_stack_trace(node.outputs, mul_inputs)
return mul_inputs
else:
return [T.mul(*mul_inputs)]
ret = T.mul(*mul_inputs)
# Copy over stacktrace from previous output to new mul op,
# for same reason as above.
copy_stack_trace(node.outputs, [ret] + mul_inputs)
return [ret]
if isinstance(node.op, T.Sum) and node_inps.owner and node_inps.owner.op == T.neg:
return [T.neg(node.op(node_inps.owner.inputs[0]))]
s = node.op(node_inps.owner.inputs[0])
ret = T.neg(s)
# There are never errors in the negative op, thus
# we need only to copy over stacktrace from previous output node to
# the two new ops.
copy_stack_trace(node.outputs, [s, ret])
return [ret]
@register_specialize
......@@ -5092,7 +5101,11 @@ def local_elemwise_sub_zeros(node):
node.op.scalar_op.nin == 2 and
node.op.scalar_op == scalar.sub and
node.inputs[0] == node.inputs[1]):
return [T.zeros_like(node.inputs[0])]
res = T.zeros_like(node.inputs[0])
# Copy over stacktrace from previous output.
# This could help for failures due to out-of-memory.
copy_stack_trace(node.outputs, res)
return [res]
@register_useless
......@@ -5139,54 +5152,77 @@ def local_useless_elemwise_comparison(node):
# Elemwise[{LT,GT}](X, X) -> Elemwise[zeros](X)
if isinstance(node.op.scalar_op, (scalar.LT, scalar.GT)) and \
node.inputs[0] is node.inputs[1]:
return [T.zeros_like(node.inputs[0], dtype=dtype, opt=True)]
res = T.zeros_like(node.inputs[0], dtype=dtype, opt=True)
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
# Elemwise[{LE,GE}](X, X) -> Elemwise[ones](X)
if isinstance(node.op.scalar_op, (scalar.LE, scalar.GE)) and \
node.inputs[0] is node.inputs[1]:
return [T.ones_like(node.inputs[0], dtype=dtype, opt=True)]
res = T.ones_like(node.inputs[0], dtype=dtype, opt=True)
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
# Elemwise[{minimum,maximum}](X, X) -> X
if isinstance(node.op.scalar_op, (scalar.Minimum, scalar.Maximum)) and \
node.inputs[0] is node.inputs[1]:
return [node.inputs[0]]
res = node.inputs[0]
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
# Elemwise[LT](X.shape[i], 0) -> Elemwise[zeros](X)
if isinstance(node.op.scalar_op, scalar.LT) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Shape_i) and \
T.extract_constant(node.inputs[1], only_process_constants=True) == 0:
return [T.zeros_like(node.inputs[0], dtype=dtype, opt=True)]
res = T.zeros_like(node.inputs[0], dtype=dtype, opt=True)
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
# Elemwise[GE](X.shape[i], 0) -> Elemwise[ones](X)
if isinstance(node.op.scalar_op, scalar.GE) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Shape_i) and \
T.extract_constant(node.inputs[1], only_process_constants=True) == 0:
return [T.ones_like(node.inputs[0], dtype=dtype, opt=True)]
res = T.ones_like(node.inputs[0], dtype=dtype, opt=True)
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
# Elemwise[maximum](X.shape[i], 0) -> X.shape[i]
if isinstance(node.op.scalar_op, scalar.Maximum) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Shape_i) and \
T.extract_constant(node.inputs[1], only_process_constants=True) == 0:
# No need to copy over stacktrace.
return [node.inputs[0]]
# Elemwise[maximum](0, X.shape[i]) -> X.shape[i]
if isinstance(node.op.scalar_op, scalar.Maximum) and \
T.extract_constant(node.inputs[0], only_process_constants=True) == 0 and \
node.inputs[1].owner and \
isinstance(node.inputs[1].owner.op, Shape_i):
# No need to copy over stacktrace.
return [node.inputs[1]]
# Elemwise[minimum](X.shape[i], 0) -> 0
if isinstance(node.op.scalar_op, scalar.Minimum) and \
node.inputs[0].owner and \
isinstance(node.inputs[0].owner.op, Shape_i) and \
T.extract_constant(node.inputs[1], only_process_constants=True) == 0:
return [T.zeros_like(node.inputs[0], dtype=dtype, opt=True)]
res = T.zeros_like(node.inputs[0], dtype=dtype, opt=True)
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
# It don't detect case when the 0 is all zeros with ndim > 0.
# Elemwise[minimum](0, X.shape[i]) -> 0
if isinstance(node.op.scalar_op, scalar.Minimum) and \
T.extract_constant(node.inputs[0], only_process_constants=True) == 0 and \
node.inputs[1].owner and \
isinstance(node.inputs[1].owner.op, Shape_i):
return [T.zeros_like(node.inputs[1], dtype=dtype, opt=True)]
res = T.zeros_like(node.inputs[1], dtype=dtype, opt=True)
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
# Elemwise[LT](add([anything that is shapes]), 0) -> Elemwise[zeros](X)
if isinstance(node.op.scalar_op, scalar.LT) and \
......@@ -5196,8 +5232,10 @@ def local_useless_elemwise_comparison(node):
all([isinstance(var.owner and var.owner.op, Shape_i)
for var in node.inputs[0].owner.inputs]) and \
T.extract_constant(node.inputs[1], only_process_constants=True) == 0:
return [T.zeros_like(node.inputs[0], dtype=dtype, opt=True)]
res = T.zeros_like(node.inputs[0], dtype=dtype, opt=True)
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
# Elemwise[GE](add([anything that is shapes]), 0) -> Elemwise[ones](X)
if isinstance(node.op.scalar_op, scalar.GE) and \
node.inputs[0].owner and \
......@@ -5206,7 +5244,11 @@ def local_useless_elemwise_comparison(node):
all([isinstance(var.owner and var.owner.op, Shape_i)
for var in node.inputs[0].owner.inputs]) and \
T.extract_constant(node.inputs[1], only_process_constants=True) == 0:
return [T.ones_like(node.inputs[0], dtype=dtype, opt=True)]
res = T.ones_like(node.inputs[0], dtype=dtype, opt=True)
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
# Elemwise[EQ](Subtensor(Shape(x)), -N)
# Elemwise[EQ](somegraph that only depend of shape, -N)
......@@ -5238,9 +5280,15 @@ def local_useless_elemwise_comparison(node):
try:
cst = get_scalar_constant_value(node.inputs[1],
only_process_constants=True)
res = T.zeros_like(node.inputs[0], dtype=dtype, opt=True)
if cst < 0:
return [T.zeros_like(node.inputs[0],
dtype=dtype, opt=True)]
# Copy over stacktrace from previous output.
copy_stack_trace(node.outputs, res)
return [res]
except NotScalarConstantError:
pass
return
......@@ -6015,7 +6063,7 @@ def local_add_specialize(node):
return False
register_specialize(local_add_specialize)
mul_canonizer = in2out(gof.LocalOptGroup(local_mul_canonizer, local_fill_cut,
mul_canonizer = in2out(gof.LocalOptGroup(local_mul_canonizer,
local_fill_sink, apply_all_opts=True),
name='mul_canonizer_groups')
......@@ -6221,7 +6269,7 @@ def add_calculate(num, denum, aslist=False, out_type=None):
local_add_canonizer = Canonizer(T.add, T.sub, T.neg, add_calculate)
add_canonizer = in2out(gof.LocalOptGroup(local_add_canonizer, local_fill_cut,
add_canonizer = in2out(gof.LocalOptGroup(local_add_canonizer,
local_fill_sink, apply_all_opts=True),
name='add_canonizer_group')
......
......@@ -3451,7 +3451,7 @@ def test_local_subtensor_of_alloc():
def test_local_fill_useless():
# Test opt local_fill_cut
# Test opt local_fill_useless
x = dvector()
y = dvector()
z = lvector()
......@@ -3500,6 +3500,67 @@ def test_local_fill_useless():
f(m_, x_)
def test_local_elemwise_sub_zeros():
# Test opt local_elemwise_sub_zeros
# We test separately for scalars, vectors and matrices
scalar = T.scalar()
vect = T.vector()
mat = T.matrix()
rng = numpy.random.RandomState(seed=utt.fetch_seed())
scalar_val = rng.rand(1).astype(config.floatX)[0]
vect_val = rng.rand(5).astype(config.floatX)
mat_val = rng.rand(3, 2).astype(config.floatX)
mode = theano.compile.get_default_mode()\
.excluding('canonicalize', 'uncanonicalize',
'ShapeOpt', 'local_fill_to_alloc',
'local_elemwise_alloc')\
.including('local_elemwise_sub_zeros')
# Test scalar minus scalar
f = function([scalar], scalar - scalar, mode=mode)
# Check optimized graph is correct
assert isinstance(f.maker.fgraph.toposort()[0].op, T.Elemwise)
assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op,
theano.scalar.Second)
assert isinstance(f.maker.fgraph.toposort()[0].inputs[1],
T.TensorConstant) or\
isinstance(f.maker.fgraph.toposort()[0].inputs[1],
T.TensorConstant)
utt.assert_allclose(f(scalar_val), 0.0)
# Check stack trace is copied over
assert check_stack_trace(f, ops_to_check='all')
# Test vector minus vector
f = function([vect], vect - vect, mode=mode)
# Check optimized graph is correct
assert isinstance(f.maker.fgraph.toposort()[0].op, T.Elemwise)
assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op,
theano.scalar.Second)
assert isinstance(f.maker.fgraph.toposort()[0].inputs[1],
T.TensorConstant) or\
isinstance(f.maker.fgraph.toposort()[0].inputs[1],
T.TensorConstant)
utt.assert_allclose(f(vect_val), numpy.zeros(vect_val.shape))
# Check stack trace is copied over
assert check_stack_trace(f, ops_to_check='all')
# Test vector minus vector
f = function([mat], mat - mat, mode=mode)
# Check optimized graph is correct
assert isinstance(f.maker.fgraph.toposort()[0].op, T.Elemwise)
assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op,
theano.scalar.Second)
assert isinstance(f.maker.fgraph.toposort()[0].inputs[1],
T.TensorConstant) or\
isinstance(f.maker.fgraph.toposort()[0].inputs[1],
T.TensorConstant)
utt.assert_allclose(f(mat_val), numpy.zeros(mat_val.shape))
# Check stack trace is copied over
assert check_stack_trace(f, ops_to_check='all')
class Test_local_useless_elemwise_comparison(unittest.TestCase):
def setUp(self):
self.rng = numpy.random.RandomState(utt.fetch_seed())
......@@ -3743,6 +3804,17 @@ class Test_local_useless_elemwise_comparison(unittest.TestCase):
f = theano.function([x], T.xor(x, x), mode=mode)
self.assert_eqs_const(f, 0)
def test_stacktrace(self):
mode = theano.compile.get_default_mode().including(
'local_useless_elemwise_comparison')
x = T.vector('x', dtype=config.floatX)
f = theano.function([x], T.gt(x, x), mode=mode)
self.assertTrue(check_stack_trace(f, ops_to_check='last'))
f = theano.function([x], T.le(x, x), mode=mode)
self.assertTrue(check_stack_trace(f, ops_to_check='last'))
class Test_local_canonicalize_alloc(unittest.TestCase):
def setUp(self):
......@@ -5604,6 +5676,35 @@ class T_local_sum_prod(unittest.TestCase):
finally:
config.on_opt_error = backup
def test_local_sum_prod_mul_by_scalar_stack_trace(self):
# Test that stack trace is copied over correctly for local_sum_prod_mul_by_scalar.
m0 = theano.compile.get_default_mode()\
.excluding('inplace_elemwise_opt')\
.including('canonicalize', 'specialize')
vect = T.dvector()
mat = T.dmatrix()
scalar = T.dscalar()
f = theano.function([vect, scalar], T.sum(vect * scalar), mode=m0)
assert check_stack_trace(f, ops_to_check='all')
f = theano.function([vect], T.sum(-vect), mode=m0)
assert check_stack_trace(f, ops_to_check=[T.Sum])
f = theano.function([vect, scalar],
T.elemwise.Prod()(vect * scalar), mode=m0)
assert check_stack_trace(f, ops_to_check=[T.elemwise.Prod])
f = theano.function([vect], T.elemwise.Prod()(-vect), mode=m0)
assert check_stack_trace(f, ops_to_check=[T.elemwise.Prod])
f = theano.function([mat, scalar], T.sum(mat * scalar), mode=m0)
assert check_stack_trace(f, ops_to_check='all')
f = theano.function([mat], T.sum(-mat), mode=m0)
assert check_stack_trace(f, ops_to_check=[T.Sum])
class T_local_opt_alloc(unittest.TestCase):
def test_sum_upcast(self):
......@@ -6287,6 +6388,9 @@ class Test_Reshape(unittest.TestCase):
topo = f.maker.fgraph.toposort()
assert sum(isinstance(node.op, self.op) for node in topo) == 1
# Check stack trace
self.assertTrue(check_stack_trace(f, ops_to_check=[self.op]))
class Test_local_useless_reshape(unittest.TestCase):
def setUp(self):
......@@ -6316,6 +6420,9 @@ class Test_local_useless_reshape(unittest.TestCase):
topo = f2.maker.fgraph.toposort()
assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
# We do not need tests checking that stack traces are copied over,
# because local_useless_reshape only removes nodes from the graph
def test_2(self):
x = theano.tensor.matrix('x')
r = x.reshape([Shape_i(i)(x) for i in xrange(x.ndim)])
......@@ -6361,7 +6468,7 @@ class Test_local_reshape_to_dimshuffle(unittest.TestCase):
"TensorConstant{[5 6]}))]")
# Check stacktrace was copied over correctly after opt was applied
check_stack_trace(g, ops_to_check=(T.DimShuffle, T.Reshape))
assert check_stack_trace(g, ops_to_check=(T.DimShuffle, T.Reshape))
def test_local_reshape_lift():
......@@ -6375,6 +6482,8 @@ def test_local_reshape_lift():
topo = f.maker.fgraph.toposort()
assert isinstance(topo[-2].op, tensor.Reshape)
assert isinstance(topo[-1].op, tensor.Elemwise)
# Check stacktrace was copied over correctly after opt was applied
assert check_stack_trace(f, ops_to_check='last')
class Test_lift_transpose_through_dot(unittest.TestCase):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论