Fixed recent buggy implementation of CrossentropySoftmax1HotWithBiasDx.grad and…

Fixed recent buggy implementation of CrossentropySoftmax1HotWithBiasDx.grad and added unit test to check it

Fixed recent buggy implementation of CrossentropySoftmax1HotWithBiasDx.grad and…
96acd167 · Olivier Delalleau · a7d3d3e5 · 96acd167 · 96acd167
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -682,16 +682,20 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
        y_idx = tensor.as_tensor_variable(y_idx)
        return gof.Apply(self, [dy, sm, y_idx],[sm.type.make_variable()])
    def perform(self, node, input_storage, output_storage):
-        dy,sm,y_idx = input_storage
+        dy, sm, y_idx = input_storage
        dx = numpy.zeros_like(sm)
        for i in xrange(sm.shape[0]):
            dx[i] = dy[i] * sm[i] #vector scale
            dx[i, y_idx[i]] -= dy[i] #scalar decrement
        output_storage[0][0] = dx
    def grad(self, (dy, sm, y_idx), (g_dx, )):
-        # Note: currently we do not care about computing the gradient of dy,
+        # TODO: currently we do not compute the gradient w.r.t. dy, because
-        # since we usually should not need it.
+        # advanced indexing is not working yet. When it works, do it to avoid
-        return [None, dy * g_dx, None]
+        # potentially misleading behavior in gradient computations! (although
+        # typically we should not need the gradient w.r.t. dy).
+        # y_idx_range = tensor.arange(y_idx.shape[0])
+        # return [g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(sm, -1, y_idx_range, y_idx), dy.dimshuffle(0, 'x') * g_dx, None]
+        return [None, dy.dimshuffle(0, 'x') * g_dx, None]
    def c_code_cache_version(self):
        return (2,)
    def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -82,6 +82,20 @@ class T_CrossentropySoftmax1Hot(unittest.TestCase):
            return crossentropy_softmax_1hot(a, y_idx)[0]
        utt.verify_grad(f, [numpy.random.rand(3,4)])
+class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
+    def setUp(self):
+        utt.seed_rng()
+    def test0(self):
+        def f(sm):
+            return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
+                numpy.random.rand(10),  # Gradient w.r.t. NLL.
+                sm,                     # Softmax output.
+                numpy.random.randint(low=0, high=5, size=10))) # Class indices.
+        # Build a random softmax output whose rows sum to 1.
+        softmax_output = numpy.random.rand(10, 5)
+        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
+        utt.verify_grad(f, [softmax_output])
 class T_prepend(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()