提交 e0a4f15a authored 作者: Kelvin Xu's avatar Kelvin Xu

add stack traces to local optimizers

上级 9b10aaee
...@@ -23,6 +23,7 @@ from theano.tensor import basic as tensor ...@@ -23,6 +23,7 @@ from theano.tensor import basic as tensor
from theano.tensor import subtensor from theano.tensor import subtensor
from theano.tensor import elemwise from theano.tensor import elemwise
from theano.tensor import opt from theano.tensor import opt
from theano.tensor.opt import copy_stack_trace
from theano.compile import optdb from theano.compile import optdb
from theano.gof import Apply from theano.gof import Apply
...@@ -31,6 +32,7 @@ from theano.gradient import DisconnectedType ...@@ -31,6 +32,7 @@ from theano.gradient import DisconnectedType
from theano.gradient import grad_not_implemented from theano.gradient import grad_not_implemented
from theano.tensor.type import values_eq_approx_remove_nan from theano.tensor.type import values_eq_approx_remove_nan
############ ############
# #
# TENSOR OPS # TENSOR OPS
...@@ -591,7 +593,7 @@ def softmax_graph(c): ...@@ -591,7 +593,7 @@ def softmax_graph(c):
def softmax(c): def softmax(c):
return softmax_op(c) return softmax_op(c)
# seems like need to change softmax_with_bias
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([softmax_op]) @gof.local_optimizer([softmax_op])
def local_softmax_with_bias(node): def local_softmax_with_bias(node):
...@@ -636,14 +638,17 @@ def local_softmax_with_bias(node): ...@@ -636,14 +638,17 @@ def local_softmax_with_bias(node):
vector_sum = tensor.add(*vectors) vector_sum = tensor.add(*vectors)
else: else:
vector_sum = vectors[0] vector_sum = vectors[0]
copy_stack_trace(x_in, vector_sum)
if len(non_vectors) > 1: if len(non_vectors) > 1:
non_vector_sum = tensor.add(*non_vectors) non_vector_sum = tensor.add(*non_vectors)
else: else:
non_vector_sum = non_vectors[0] non_vector_sum = non_vectors[0]
copy_stack_trace(x_in, non_vector_sum)
try: try:
sm_bias = softmax_with_bias(non_vector_sum, vector_sum) sm_bias = softmax_with_bias(non_vector_sum, vector_sum)
copy_stack_trace(x_in, non_vector_sum)
except Exception: except Exception:
# if our arguments have the wrong types, then # if our arguments have the wrong types, then
# forget about it # forget about it
...@@ -692,6 +697,7 @@ def softmax_simplifier(numerators, denominators): ...@@ -692,6 +697,7 @@ def softmax_simplifier(numerators, denominators):
return numerators, denominators return numerators, denominators
opt.local_mul_canonizer.add_simplifier(softmax_simplifier, 'softmax_simplifier') opt.local_mul_canonizer.add_simplifier(softmax_simplifier, 'softmax_simplifier')
# another commit that removes
if 0: if 0:
@opt.register_specialize @opt.register_specialize
@gof.local_optimizer([tensor.add]) @gof.local_optimizer([tensor.add])
...@@ -1457,6 +1463,7 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node): ...@@ -1457,6 +1463,7 @@ def local_softmax_grad_to_crossentropy_with_softmax_grad(node):
g_nll, coding_dist, true_one_of_n = g_coding_dist.owner.inputs g_nll, coding_dist, true_one_of_n = g_coding_dist.owner.inputs
dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, coding_dist, dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, coding_dist,
true_one_of_n) true_one_of_n)
copy_stack_trace(node.outputs[0], dx)
return [dx] return [dx]
...@@ -1485,13 +1492,18 @@ def local_argmax_pushdown(node): ...@@ -1485,13 +1492,18 @@ def local_argmax_pushdown(node):
if x.owner and x.owner.op in (softmax_op, softplus, tensor.exp, if x.owner and x.owner.op in (softmax_op, softplus, tensor.exp,
tensor.log, tensor.tanh, sigmoid): tensor.log, tensor.tanh, sigmoid):
pre_x, = x.owner.inputs pre_x, = x.owner.inputs
return tensor._max_and_argmax(pre_x, axis) ret = tensor._max_and_argmax(pre_x, axis)
copy_stack_trace(pre_x, ret)
return ret
if x.owner and x.owner.op == softmax_with_bias: if x.owner and x.owner.op == softmax_with_bias:
pre_x, pre_bias = x.owner.inputs pre_x, pre_bias = x.owner.inputs
return tensor._max_and_argmax(pre_x + ret = tensor._max_and_argmax(pre_x +
tensor.DimShuffle( tensor.DimShuffle(
pre_bias.broadcastable, pre_bias.broadcastable,
('x', 0))(pre_bias), axis) ('x', 0))(pre_bias), axis)
# copy both stack traces
copy_stack_trace([pre_x, pre_bias], ret)
return ret
# Utility function used by the two next optimizations # Utility function used by the two next optimizations
...@@ -1585,9 +1597,11 @@ def local_advanced_indexing_crossentropy_onehot(node): ...@@ -1585,9 +1597,11 @@ def local_advanced_indexing_crossentropy_onehot(node):
# Check that rows == arange(labels.shape[0]) # Check that rows == arange(labels.shape[0])
if _check_rows_is_arange_len_labels(rows, labels): if _check_rows_is_arange_len_labels(rows, labels):
if labels.ndim == 1 and x_var.ndim == 2: if labels.ndim == 1 and x_var.ndim == 2:
return [-crossentropy_softmax_argmax_1hot_with_bias(x_var, ret = -crossentropy_softmax_argmax_1hot_with_bias(x_var,
b_var, b_var,
labels)[0]] labels)[0]
copy_stack_trace([x_var, b_var, labels], ret)
return [ret]
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
...@@ -1809,11 +1823,14 @@ def local_advanced_indexing_crossentropy_onehot_grad(node): ...@@ -1809,11 +1823,14 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# Dimension check before substitution # Dimension check before substitution
if labels.ndim == 1 and x_var.ndim == 2: if labels.ndim == 1 and x_var.ndim == 2:
return [crossentropy_softmax_1hot_with_bias_dx(out_grad, sm, labels)] ret = crossentropy_softmax_1hot_with_bias_dx(out_grad, sm, labels)
# The stack trace of output_grad, sm and labels are not added
# but may need to be added at a future point
copy_stack_trace(node.outputs[0], ret)
return [ret]
else: else:
return return
@opt.register_specialize('fast_compile_gpu') @opt.register_specialize('fast_compile_gpu')
@gof.local_optimizer([softmax_with_bias]) @gof.local_optimizer([softmax_with_bias])
def graph_merge_softmax_with_crossentropy_softmax(node): def graph_merge_softmax_with_crossentropy_softmax(node):
...@@ -1825,6 +1842,7 @@ def graph_merge_softmax_with_crossentropy_softmax(node): ...@@ -1825,6 +1842,7 @@ def graph_merge_softmax_with_crossentropy_softmax(node):
if big_client in [b_client[0] for b_client in b.clients]: if big_client in [b_client[0] for b_client in b.clients]:
xx, bb, ll = big_client.inputs xx, bb, ll = big_client.inputs
mergeable_client = big_client.op(x, b, ll) mergeable_client = big_client.op(x, b, ll)
copy_stack_trace(node.ouputs[0], mergeable_client[1])
return [mergeable_client[1]] return [mergeable_client[1]]
...@@ -1885,7 +1903,10 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node): ...@@ -1885,7 +1903,10 @@ def local_useless_crossentropy_softmax_1hot_with_bias_dx_alloc(node):
msg = '`sm` and `dy` do not have the same shape.' msg = '`sm` and `dy` do not have the same shape.'
dz = opt.Assert(msg)(dz, cond) dz = opt.Assert(msg)(dz, cond)
return [node.op(dz, sm, y_idx)] ret = node.op(dz, sm, y_idx)
# copy node.outputs[0] to ret according to Pascal
copy_stack_trace(node.outputs[0], ret)
return [ret]
def binary_crossentropy(output, target): def binary_crossentropy(output, target):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论