提交 f036a5d1 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Implement gradient of CrossEntropySoftmax[...]Dx wrt dy.

上级 29ea70d6
...@@ -701,9 +701,14 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -701,9 +701,14 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
# advanced indexing is not working yet. When it works, do it to avoid # advanced indexing is not working yet. When it works, do it to avoid
# potentially misleading behavior in gradient computations! (although # potentially misleading behavior in gradient computations! (although
# typically we should not need the gradient w.r.t. dy). # typically we should not need the gradient w.r.t. dy).
# y_idx_range = tensor.arange(y_idx.shape[0]) y_idx_range = tensor.arange(y_idx.shape[0])
# return [g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(sm, -1, y_idx_range, y_idx), dy.dimshuffle(0, 'x') * g_dx, None] g_dy = tensor.sum(
return [None, dy.dimshuffle(0, 'x') * g_dx, None] g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(
sm, tensor.fill(dy, -1), y_idx_range, y_idx),
axis=1)
g_sm = dy.dimshuffle(0, 'x') * g_dx
g_y_idx = None
return [g_dy, g_sm, g_y_idx]
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (2,)
def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub): def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论