Merge pull request #3931 from adbrebs/h_softmax_speedup

Speed up h_softmax when full output is requested.

Merge pull request #3931 from adbrebs/h_softmax_speedup
e9c56c39 · Frédéric Bastien · 5a3526cb · 9a8bb9de · e9c56c39 · e9c56c39
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -2293,14 +2293,8 @@ def h_softmax(x, batch_size, n_outputs, n_classes, n_outputs_per_class,
    if target is None:  # Computes the probabilites of all the outputs
-        class_ids = tensor.tile(
-            tensor.arange(n_classes, dtype="int32")[None, :], (batch_size, 1))
        # Second softmax that computes the output probabilities
-        activations = sparse_block_dot(
+        activations = tensor.tensordot(x, W2, (1, 1)) + b2
-            W2[None, :, :, :], x[:, None, :],
-            tensor.zeros((batch_size, 1), dtype='int32'), b2, class_ids)
        output_probs = theano.tensor.nnet.softmax(
            activations.reshape((-1, n_outputs_per_class)))
        output_probs = output_probs.reshape((batch_size, n_classes, -1))

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -1614,6 +1614,14 @@ def test_h_softmax():
    #############
    x_mat = numpy.random.normal(size=(batch_size, input_size)).astype(floatX)
    y_mat = numpy.random.randint(0, output_size, batch_size).astype('int32')
-    assert(fun_output_tg(x_mat, y_mat).shape == (batch_size,))
+    tg_output = fun_output_tg(x_mat, y_mat)
-    assert(fun_output(x_mat).shape == (batch_size, output_size))
+    all_outputs = fun_output(x_mat)
+    assert(tg_output.shape == (batch_size,))
+    assert(all_outputs.shape == (batch_size, output_size))
+    # Verifies that the outputs computed by fun_output_tg are the same as those
+    # computed by fun_output.
+    utt.assert_allclose(
+            all_outputs[numpy.arange(0, batch_size), y_mat], tg_output)