Implemented gradient of CrossentropySoftmaxArgmax1HotWithBias when there is an…

Implemented gradient of CrossentropySoftmaxArgmax1HotWithBias when there is an incoming gradient on the softmax's output

Implemented gradient of CrossentropySoftmaxArgmax1HotWithBias when there is an…
a78e35e3 · Olivier Delalleau · 38515d4f · a78e35e3 · a78e35e3
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -565,13 +565,21 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
        output_storage[1][0] = sm
        output_storage[2][0] = am
    def grad(self, (x, b, y_idx), (g_nll, g_sm, g_am)):
-        if g_sm is not None or g_am is not None:
+        if g_am is not None:
            raise NotImplementedError()
-        nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
-        #dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
-        dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx)
-        db = tensor.sum(dx, axis = [0])
-        return dx, db, None
+        elif g_sm is not None:
+            # There is a gradient w.r.t. the softmax's output itself.
+            if g_nll is not None or g_am is not None:
+                raise NotImplementedError()
+            return softmax_with_bias.grad((x, b, ), (g_sm, )) + (None, )
+        else:
+            # There is a gradient w.r.t. the NLL.
+            assert g_nll is not None
+            nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
+            #dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
+            dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx)
+            db = tensor.sum(dx, axis = [0])
+            return dx, db, None

    def c_headers(self):
        return ['<iostream>', '<cmath>']

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -96,6 +96,27 @@ class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
        utt.verify_grad(f, [softmax_output])

+class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase):
+    def setUp(self):
+        utt.seed_rng()
+        self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
+    def test0(self):
+        n_classes = 5
+        n_samples = 3
+        # First test gradient when getting a gradient on the NLL output.
+        def grad_on_nll(x, b):
+            return self.op(x, b, y_idx=numpy.random.randint(
+                low=0, high=n_classes, size=n_samples))[0]
+        utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes),
+            numpy.random.rand(n_classes)])
+        # Then test gradient when getting a gradient on the softmax output.
+        def grad_on_softmax(x, b):
+            return self.op(x, b, y_idx=numpy.random.randint(
+                low=0, high=n_classes, size=n_samples))[1]
+        utt.verify_grad(grad_on_softmax,
+                [numpy.random.rand(n_samples, n_classes),
+                    numpy.random.rand(n_classes)])
+
 class T_prepend(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()