提交 66b29241 authored 作者: Iulian Vlad Serban's avatar Iulian Vlad Serban

Fixed stack trace copying for several local optimizations according to Pascal's advice.

上级 21d70281
...@@ -62,22 +62,23 @@ theano.configparser.AddConfigVar('on_shape_error', ...@@ -62,22 +62,23 @@ theano.configparser.AddConfigVar('on_shape_error',
# Utilities # Utilities
def copy_stack_trace(from_var, to_var): def copy_stack_trace(from_var, to_var):
""" """
Copies the stack trace from one or more tensor variables to Copies the stack trace from one or more tensor variables to
one or more tensor variables. one or more tensor variables.
:param from_var: tensor variable or list of tensor variables to :param from_var: tensor variable or list of tensor variables to
copy stack traces from. copy stack traces from.
:param to_var: tensor variable or list of tensor variables to :param to_var: tensor variable or list of tensor variables to
copy stack traces to. copy stack traces to.
.. note:: The stacktrace is assumed to be of the form of a list of lists .. note:: The stacktrace is assumed to be of the form of a list of lists
of tuples. Each tuple contains the filename, line number, function name of tuples. Each tuple contains the filename, line number, function name
and so on. Each list of tuples contains the truples belonging to a and so on. Each list of tuples contains the truples belonging to a
particular variable. particular variable.
""" """
# Store stack traces from from_var # Store stack traces from from_var
tr = [] tr = []
if type(from_var) is list: if type(from_var) is list:
...@@ -547,6 +548,7 @@ def local_dimshuffle_lift(node): ...@@ -547,6 +548,7 @@ def local_dimshuffle_lift(node):
copy_stack_trace(node.outputs[0], ret) copy_stack_trace(node.outputs[0], ret)
return [ret] return [ret]
@register_canonicalize @register_canonicalize
@gof.local_optimizer([T.DimShuffle]) @gof.local_optimizer([T.DimShuffle])
def local_lift_transpose_through_dot(node): def local_lift_transpose_through_dot(node):
...@@ -572,12 +574,11 @@ def local_lift_transpose_through_dot(node): ...@@ -572,12 +574,11 @@ def local_lift_transpose_through_dot(node):
# Output is dot product of transposed inputs in reverse order # Output is dot product of transposed inputs in reverse order
ret = [T.dot(y.T, x.T)] ret = [T.dot(y.T, x.T)]
# Copy over stack trace to output from x and y to output # Copy over stack trace to output from result of dot-product
copy_stack_trace([x, y], ret) copy_stack_trace(node.inputs[0], ret)
return ret return ret
@gof.local_optimizer([DimShuffle]) @gof.local_optimizer([DimShuffle])
def dimshuffle_as_view(node): def dimshuffle_as_view(node):
op = node.op op = node.op
...@@ -1422,11 +1423,10 @@ def local_shape_to_shape_i(node): ...@@ -1422,11 +1423,10 @@ def local_shape_to_shape_i(node):
ret = shape_feature.make_vector_shape(node.inputs[0]) ret = shape_feature.make_vector_shape(node.inputs[0])
# We need to copy over stack trace from input to output # We need to copy over stack trace from input to output
copy_stack_trace(node.inputs[0], ret) copy_stack_trace(node.outputs[0], ret)
return [ret] return [ret]
# TODO: Not sure what type of node we are expecting here # TODO: Not sure what type of node we are expecting here
@register_specialize @register_specialize
@register_canonicalize @register_canonicalize
...@@ -1441,7 +1441,7 @@ def local_track_shape_i(node): ...@@ -1441,7 +1441,7 @@ def local_track_shape_i(node):
# fgraph as we don't change it in the shapefeature internal # fgraph as we don't change it in the shapefeature internal
# structure. # structure.
assert isinstance(node.op, Shape_i) assert isinstance(node.op, Shape_i)
replacement = shape_feature.scheduled[node] replacement = shape_feature.scheduled[node]
return [shape_feature.shape_of[replacement][node.op.i]] return [shape_feature.shape_of[replacement][node.op.i]]
...@@ -1500,9 +1500,8 @@ def local_subtensor_make_vector(node): ...@@ -1500,9 +1500,8 @@ def local_subtensor_make_vector(node):
values = list(map(int, list(idx.value))) values = list(map(int, list(idx.value)))
ret = [make_vector(*[x.owner.inputs[v] for v in values])] ret = [make_vector(*[x.owner.inputs[v] for v in values])]
# Copy over stack traces from each index to every element of new list? # Copy over stack trace from previous output to new output
# If yes, then same should be done for const_slice just below... copy_stack_trace(node.outputs[0], ret)
copy_stack_trace([x.owner.inputs[v] for v in values], ret)
return ret return ret
else: else:
raise TypeError('case not expected') raise TypeError('case not expected')
...@@ -1540,21 +1539,21 @@ def local_useless_elemwise(node): ...@@ -1540,21 +1539,21 @@ def local_useless_elemwise(node):
if node.inputs[0] == node.inputs[1]: if node.inputs[0] == node.inputs[1]:
# it is the same var in the graph. That will always be true # it is the same var in the graph. That will always be true
ret = [T.fill(node.inputs[0], ret = [T.fill(node.inputs[0],
T.constant(1.0, T.constant(1.0,
dtype=node.outputs[0].type.dtype))] dtype=node.outputs[0].type.dtype))]
# Copy stack trace from input to constant output # Copy stack trace from input to constant output
copy_stack_trace(node.inputs[0], ret) copy_stack_trace(node.outputs[0], ret)
return ret return ret
elif node.op.scalar_op == theano.scalar.neq and len(node.inputs) == 2: elif node.op.scalar_op == theano.scalar.neq and len(node.inputs) == 2:
if node.inputs[0] == node.inputs[1]: if node.inputs[0] == node.inputs[1]:
# it is the same var in the graph. That will always be false # it is the same var in the graph. That will always be false
ret = [T.fill(node.inputs[0], ret = [T.fill(node.inputs[0],
T.constant(0.0, T.constant(0.0,
dtype=node.outputs[0].type.dtype))] dtype=node.outputs[0].type.dtype))]
# Copy stack trace from input to constant output # Copy stack trace from input to constant output
copy_stack_trace(node.inputs[0], ret) copy_stack_trace(node.outputs[0], ret)
return ret return ret
elif node.op.scalar_op == theano.scalar.mul and len(node.inputs) == 1: elif node.op.scalar_op == theano.scalar.mul and len(node.inputs) == 1:
...@@ -1580,6 +1579,7 @@ def local_alloc_unary(node): ...@@ -1580,6 +1579,7 @@ def local_alloc_unary(node):
x = a.owner.inputs[0] x = a.owner.inputs[0]
shp = a.owner.inputs[1:] shp = a.owner.inputs[1:]
v = node.op(x) v = node.op(x)
copy_stack_trace(node.outputs[0], v)
ret = T.alloc(T.cast(v, node.outputs[0].dtype), *shp) ret = T.alloc(T.cast(v, node.outputs[0].dtype), *shp)
# Is it really necessary to copy over stack trace here? # Is it really necessary to copy over stack trace here?
...@@ -1643,7 +1643,7 @@ def local_func_inv(node): ...@@ -1643,7 +1643,7 @@ def local_func_inv(node):
for inv_pair in inv_pairs: for inv_pair in inv_pairs:
if is_inverse_pair(node_op, prev_op, inv_pair): if is_inverse_pair(node_op, prev_op, inv_pair):
# We don't need to copy stack trace, because the optimization # We don't need to copy stack trace, because the optimization
# is trivial and maintains the earlier stack trace # is trivial and maintains the earlier stack trace
return x.owner.inputs return x.owner.inputs
...@@ -1770,8 +1770,11 @@ def local_remove_useless_assert(node): ...@@ -1770,8 +1770,11 @@ def local_remove_useless_assert(node):
# We don't need to copy over any stack traces here # We don't need to copy over any stack traces here
return [node.inputs[0]] return [node.inputs[0]]
if len(cond) != len(node.inputs) - 1: if len(cond) != len(node.inputs) - 1:
# We don't need to copy over any stack traces here ret = assert_(node.inputs[0], *cond)
return [assert_(node.inputs[0], *cond)]
# We copy over stack trace from the output of the original assert
copy_stack_trace(node.outputs[0], ret)
return [ret]
@gof.local_optimizer([Assert]) @gof.local_optimizer([Assert])
...@@ -1920,17 +1923,19 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP): ...@@ -1920,17 +1923,19 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
# We need to keep the dimshuffle. It could swap axes or # We need to keep the dimshuffle. It could swap axes or
# add dimensions anywhere. # add dimensions anywhere.
# Do we need to copy stack trace from alloc_input to new element here? r_i = i.owner.op(alloc_input)
new_i.append(i.owner.op(alloc_input))
# Copy stack trace from i to new_i
copy_stack_trace(i, r_i)
new_i.append(r_i)
else: else:
new_i.append(i) new_i.append(i)
new_i[assert_op_idx] = assert_op new_i[assert_op_idx] = assert_op
ret = node.op(*new_i, return_list=True) ret = node.op(*new_i, return_list=True)
# Copy over stack trace from inputs to outputs.
# Maybe we want to do this elementwise to keep the trace cleaner, # Copy over stack trace from previous outputs to new outputs.
# but that's not really clear. copy_stack_trace(node.outputs, ret)
copy_stack_trace(new_i, ret)
return ret return ret
return local_elemwise_alloc return local_elemwise_alloc
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论