提交 3964a349 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Review changes in Xent optimization.

上级 d9b62292
...@@ -1088,10 +1088,12 @@ def local_advanced_indexing_crossentropy_onehot_grad(node): ...@@ -1088,10 +1088,12 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
adv_subtensor = input adv_subtensor = input
outgrad_factor /= rest outgrad_factor /= rest
break break
else: else:
return return
# The output gradient needs to be a vector
out_grad = tensor.fill(x_var[:,0], outgrad_factor)
if adv_subtensor is not None: if adv_subtensor is not None:
try: try:
maybe_sm, maybe_rows, maybe_labels = adv_subtensor.owner.inputs maybe_sm, maybe_rows, maybe_labels = adv_subtensor.owner.inputs
...@@ -1115,7 +1117,6 @@ def local_advanced_indexing_crossentropy_onehot_grad(node): ...@@ -1115,7 +1117,6 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# Second case # Second case
elif out_grad.owner and out_grad.owner.op == tensor.true_div: elif out_grad.owner and out_grad.owner.op == tensor.true_div:
# we know
# we're looking for # we're looking for
# AdvIncSubtensor(zeros, grad_nll, arange(len(y)), y) / softmax # AdvIncSubtensor(zeros, grad_nll, arange(len(y)), y) / softmax
try: try:
...@@ -1134,30 +1135,14 @@ def local_advanced_indexing_crossentropy_onehot_grad(node): ...@@ -1134,30 +1135,14 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
return return
# Check z is zeros_like(log(sm)) # Check z is zeros_like(log(sm))
# JB - do we really care if this is zeros?
if not _is_const(z, 0): if not _is_const(z, 0):
return return
if z.type not in (dmatrix, fmatrix): if z.type not in (dmatrix, fmatrix):
return return
# here we know that we are incrementing a matrix of zeros # here we know that we are incrementing a matrix of zeros
# Since out_grad and sm are the inputs of softmax_grad,
if 0: # if the graph is valid, they have the same shape, so we
if z.owner and z.owner.op == tensor.fill: # also know that z has the right shape.
model, value = z.owner.inputs
if model.owner and model.owner.op == tensor.log:
if sm is model.owner.inputs[0]:
log_sm = model
else:
return
if not (hasattr(value, 'data') and numpy.all(value.data == 0)):
return
#else: OK
else:
return
else:
return
if incr.type not in (dvector, fvector): if incr.type not in (dvector, fvector):
return return
...@@ -1171,57 +1156,12 @@ def local_advanced_indexing_crossentropy_onehot_grad(node): ...@@ -1171,57 +1156,12 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# We leave it to the Op to crash (and the user to complain) if this assumption is # We leave it to the Op to crash (and the user to complain) if this assumption is
# ever not true. # ever not true.
outgrad_factor = None out_grad = -incr
if 0:
# Check incr is ((-1.) like log(softmax(x))[arange(len(y)), y])
if incr.owner and incr.owner.op == tensor.fill:
model, value = incr.owner.inputs
adv_subtensor = None
outgrad_factor = None
if model.owner and isinstance(model.owner.op, tensor.AdvancedSubtensor):
adv_subtensor = model
else:
if model.owner and isinstance(model.owner.op, tensor.Elemwise):
for input in model.owner.inputs:
if input.owner and isinstance(input.owner.op, tensor.AdvancedSubtensor):
adv_subtensor = input
break
#TODO: try them all, not just the first one
else:
return
if adv_subtensor is not None:
try:
maybe_log_sm, maybe_rows, maybe_labels = adv_subtensor.owner.inputs
except:
return
if not (maybe_log_sm is log_sm and maybe_rows is rows and maybe_labels is labels):
return
#else: OK
else:
return
# In the base case, value is the constant '-1'
if hasattr(value, 'data') and numpy.all(value.data == -1):
outgrad_factor = 1.
# Otherwise, it should be a scalar, and the output gradient
# would be -value
elif numpy.all(value.broadcastable):
outgrad_factor = -value
else:
return
else:
return
# Check that rows is arange(labels.shape[0]) # Check that rows is arange(labels.shape[0])
if not _check_rows_is_arange_len_labels(rows, labels): if not _check_rows_is_arange_len_labels(rows, labels):
return return
# else, arguments of AdvancedIncSubtensor are OK # else, arguments of AdvancedIncSubtensor are OK
return [crossentropy_softmax_1hot_with_bias_dx(-incr, sm, labels)]
# else, numerator and denominator are OK, # else, numerator and denominator are OK,
# it was really case 2. # it was really case 2.
...@@ -1231,11 +1171,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node): ...@@ -1231,11 +1171,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
# Dimension check before substitution # Dimension check before substitution
if labels.ndim == 1 and x_var.ndim == 2: if labels.ndim == 1 and x_var.ndim == 2:
if outgrad_factor is not None: return [crossentropy_softmax_1hot_with_bias_dx(out_grad, sm, labels)]
out_grad = tensor.fill(x_var[:,0], outgrad_factor)
return [crossentropy_softmax_1hot_with_bias_dx(out_grad, sm, labels)]
else:
return
else: else:
return return
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论