提交 96acd167 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Fixed recent buggy implementation of CrossentropySoftmax1HotWithBiasDx.grad and…

Fixed recent buggy implementation of CrossentropySoftmax1HotWithBiasDx.grad and added unit test to check it
上级 a7d3d3e5
...@@ -682,16 +682,20 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op): ...@@ -682,16 +682,20 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
y_idx = tensor.as_tensor_variable(y_idx) y_idx = tensor.as_tensor_variable(y_idx)
return gof.Apply(self, [dy, sm, y_idx],[sm.type.make_variable()]) return gof.Apply(self, [dy, sm, y_idx],[sm.type.make_variable()])
def perform(self, node, input_storage, output_storage): def perform(self, node, input_storage, output_storage):
dy,sm,y_idx = input_storage dy, sm, y_idx = input_storage
dx = numpy.zeros_like(sm) dx = numpy.zeros_like(sm)
for i in xrange(sm.shape[0]): for i in xrange(sm.shape[0]):
dx[i] = dy[i] * sm[i] #vector scale dx[i] = dy[i] * sm[i] #vector scale
dx[i, y_idx[i]] -= dy[i] #scalar decrement dx[i, y_idx[i]] -= dy[i] #scalar decrement
output_storage[0][0] = dx output_storage[0][0] = dx
def grad(self, (dy, sm, y_idx), (g_dx, )): def grad(self, (dy, sm, y_idx), (g_dx, )):
# Note: currently we do not care about computing the gradient of dy, # TODO: currently we do not compute the gradient w.r.t. dy, because
# since we usually should not need it. # advanced indexing is not working yet. When it works, do it to avoid
return [None, dy * g_dx, None] # potentially misleading behavior in gradient computations! (although
# typically we should not need the gradient w.r.t. dy).
# y_idx_range = tensor.arange(y_idx.shape[0])
# return [g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(sm, -1, y_idx_range, y_idx), dy.dimshuffle(0, 'x') * g_dx, None]
return [None, dy.dimshuffle(0, 'x') * g_dx, None]
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (2,)
def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub): def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):
......
...@@ -82,6 +82,20 @@ class T_CrossentropySoftmax1Hot(unittest.TestCase): ...@@ -82,6 +82,20 @@ class T_CrossentropySoftmax1Hot(unittest.TestCase):
return crossentropy_softmax_1hot(a, y_idx)[0] return crossentropy_softmax_1hot(a, y_idx)[0]
utt.verify_grad(f, [numpy.random.rand(3,4)]) utt.verify_grad(f, [numpy.random.rand(3,4)])
class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test0(self):
def f(sm):
return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
numpy.random.rand(10), # Gradient w.r.t. NLL.
sm, # Softmax output.
numpy.random.randint(low=0, high=5, size=10))) # Class indices.
# Build a random softmax output whose rows sum to 1.
softmax_output = numpy.random.rand(10, 5)
softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
utt.verify_grad(f, [softmax_output])
class T_prepend(unittest.TestCase): class T_prepend(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论