提交 a414e5f3 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Optimization of automatically computed gradient of softmax+nll by

crossentropy_softmax_1hot_with_bias_dx
上级 b72de011
......@@ -953,6 +953,181 @@ def local_advanced_indexing_crossentropy_onehot(node):
if labels.ndim == 1 and x_var.ndim == 2:
return [-crossentropy_softmax_argmax_1hot_with_bias(x_var, b_var, labels)[0]]
@opt.register_specialize
@gof.local_optimizer([softmax_grad])
def local_advanced_indexing_crossentropy_onehot_grad(node):
if not (node.op == softmax_grad):
return
sm = None
try:
out_grad, sm = node.inputs
except:
return
if sm is not None and sm.owner and sm.owner.op == softmax:
x_var = sm.owner.inputs[0]
else:
return
# Two cases are supported:
# 1. AdvancedIncSubtensor(
# zeros_like(softmax(x)),
# -1. / AdvancedSubtensor(softmax(x), arange(y.shape[0]), y),
# arange(y.shape[0]),
# y)
# which arises from the gradient of log(softmax(x)[arange(y.shape[0]), y])
#
# 2. AdvancedIncSubtensor(
# zeros_like(log(softmax(x))),
# -1. like (AdvancedSubtensor(log(softmax(x)), arange(y.shape[0]), y)),
# arange(y.shape[0]),
# y)
# / softmax(x)
# which arises from the gradient of log(softmax(x))[arange(y.shape[0]), y]
#
# In some cases, in case 2., insted of "-1. like (AdvancedSubtensor...)",
# we can have "-1. like ([-1] * AdvancedSubtensor...)". This case will be
# recognized too, but other variants, even with the same shape, might not
# (yet).
# First case.
# After the check for AdvancedIncSubtensor, if anything does not fit with
# the formula above, there's no way to fit it with the the second case,
# so we return immediately.
if out_grad.owner and isinstance(out_grad.owner.op, tensor.AdvancedIncSubtensor):
try:
z, incr, rows, labels = out_grad.owner.inputs
except:
return
# Check that z == zeros_like(softmax(x))
if z.owner and z.owner.op == tensor.fill:
model, value = z.owner.inputs
if not (model is sm and numpy.all(value.data == 0)):
return
#else: OK
else:
return
# Check that incr has the form -1./sm[arange(len(y)), y]
if incr.owner and incr.owner.op == tensor.true_div:
num, denom = incr.owner.inputs
if not numpy.all(num.data == -1):
return
#else: OK
if denom.owner and isinstance(denom.owner.op, tensor.AdvancedSubtensor):
try:
maybe_sm, maybe_rows, maybe_labels = denom.owner.inputs
except:
return
if not (maybe_sm is sm and maybe_rows is rows and maybe_labels is labels):
return
#else: OK
else:
return
else:
return
# Check that rows is arange(labels.shape[0])
if not _check_rows_is_arange_len_labels(rows, labels):
return
# else, arguments of AdvancedIncSubtensor are OK,
# it was really case 1.
# Second case
elif out_grad.owner and out_grad.owner.op == tensor.true_div:
try:
num, denom = out_grad.owner.inputs
except:
return
# Check the numerator (AdvancedIncSubtensor)
if num.owner and isinstance(num.owner.op, tensor.AdvancedIncSubtensor):
try:
z, incr, rows, labels = num.owner.inputs
except:
return
# Check z is zeros_like(log(sm))
if z.owner and z.owner.op == tensor.fill:
model, value = z.owner.inputs
if model.owner and model.owner.op == tensor.log:
if sm is model.owner.inputs[0]:
log_sm = model
else:
return
if not numpy.all(value.data == 0):
return
#else: OK
else:
return
else:
return
# Check incr is (-1.) like log(softmax(x))[arange(len(y)), y]
if incr.owner and incr.owner.op == tensor.fill:
model, value = incr.owner.inputs
adv_subtensor = None
if model.owner and isinstance(model.owner.op, tensor.AdvancedSubtensor):
adv_subtensor = model
else:
if model.owner and isinstance(model.owner.op, tensor.Elemwise):
for input in model.owner.inputs:
if input.owner and isinstance(input.owner.op, tensor.AdvancedSubtensor):
adv_subtensor = input
break
#TODO: try them all, not just the first one
else:
return
if adv_subtensor is not None:
try:
maybe_log_sm, maybe_rows, maybe_labels = adv_subtensor.owner.inputs
except:
return
if not (maybe_log_sm is log_sm and maybe_rows is rows and maybe_labels is labels):
return
#else: OK
if not numpy.all(value.data == -1):
return
else:
return
# Check that rows is arange(labels.shape[0])
if not check_rows_is_arange_len_labels(rows, labels):
return
# else, arguments of AdvancedIncSubtensor are OK
# Check the denominator (sm)
if not denom is sm:
return
# else, numerator and denominator are OK,
# it was really case 2.
else:
return
# Dimension check before substitution
if labels.ndim == 1 and x_var.ndim == 2:
print 'YAY!'
return [crossentropy_softmax_1hot_with_bias_dx(tensor.ones_like(sm[:,0]), sm, labels)]
else:
return
def binary_crossentropy(output, target):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论