提交 a78e35e3 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Implemented gradient of CrossentropySoftmaxArgmax1HotWithBias when there is an…

Implemented gradient of CrossentropySoftmaxArgmax1HotWithBias when there is an incoming gradient on the softmax's output
上级 38515d4f
......@@ -565,13 +565,21 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
output_storage[1][0] = sm
output_storage[2][0] = am
def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
if g_sm is not None or g_am is not None:
if g_am is not None:
raise NotImplementedError()
nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
#dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx)
db = tensor.sum(dx, axis = [0])
return dx, db, None
elif g_sm is not None:
# There is a gradient w.r.t. the softmax's output itself.
if g_nll is not None or g_am is not None:
raise NotImplementedError()
return softmax_with_bias.grad((x, b, ), (g_sm, )) + (None, )
else:
# There is a gradient w.r.t. the NLL.
assert g_nll is not None
nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
#dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx)
db = tensor.sum(dx, axis = [0])
return dx, db, None
def c_headers(self):
return ['<iostream>', '<cmath>']
......
......@@ -96,6 +96,27 @@ class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
utt.verify_grad(f, [softmax_output])
class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase):
def setUp(self):
utt.seed_rng()
self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
def test0(self):
n_classes = 5
n_samples = 3
# First test gradient when getting a gradient on the NLL output.
def grad_on_nll(x, b):
return self.op(x, b, y_idx=numpy.random.randint(
low=0, high=n_classes, size=n_samples))[0]
utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes),
numpy.random.rand(n_classes)])
# Then test gradient when getting a gradient on the softmax output.
def grad_on_softmax(x, b):
return self.op(x, b, y_idx=numpy.random.randint(
low=0, high=n_classes, size=n_samples))[1]
utt.verify_grad(grad_on_softmax,
[numpy.random.rand(n_samples, n_classes),
numpy.random.rand(n_classes)])
class T_prepend(unittest.TestCase):
def setUp(self):
utt.seed_rng()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论