Various tests for softmax with a vector (or sum of vectors) as input.

ad7937d2 · Pascal Lamblin · de4b6c60 · ad7937d2
--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -6,6 +6,7 @@ from theano import gof
 import numpy
 from theano.tests import unittest_tools as utt
 from theano.tensor.tests import test_basic as TT
+from theano import printing

 from theano.tensor.nnet import *

@@ -48,6 +49,10 @@ class T_Softmax(unittest.TestCase):

        xv = numpy.random.randn(6)
        assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum())
+    def test_vector_grad(self):
+        def f(a):
+            return softmax(a)
+        utt.verify_grad(f, [numpy.random.rand(4)])


 class T_SoftmaxWithBias(unittest.TestCase):
@@ -88,6 +93,16 @@ class T_CrossentropySoftmax1Hot(unittest.TestCase):
        def f(a):
            return crossentropy_softmax_1hot(a, y_idx)[0]
        utt.verify_grad(f, [numpy.random.rand(3,4)])
+    def test_vector(self):
+        y_idx = [3]
+        def f(a):
+            return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0]
+        utt.verify_grad(f, [numpy.random.rand(4)])
+    def test_vectors(self):
+        y_idx = [3]
+        def f(a, b):
+            return crossentropy_softmax_1hot(T.shape_padleft(a)+b, y_idx)[0]
+        utt.verify_grad(f, [numpy.random.rand(4), numpy.random.rand(4)])

 class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
    def setUp(self):
@@ -202,6 +217,20 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
        assert str(env.outputs[0].owner.op) == 'OutputGuard'
        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias

+    def test_softmax_optimizations_vector(self):
+        x = tensor.vector('x')
+        one_of_n = tensor.lvector('one_of_n')
+        op = crossentropy_categorical_1hot
+        env = gof.Env(
+                [x, one_of_n],
+                [op(softmax(x), one_of_n)])
+        assert env.outputs[0].owner.op == op
+
+        theano.compile.mode.optdb.query(
+                theano.compile.mode.OPT_FAST_RUN).optimize(env)
+        assert str(env.outputs[0].owner.op) == 'OutputGuard'
+        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
+
    def test_softmax_optimizations_w_bias(self):
        x = tensor.matrix('x')
        b = tensor.vector('b')
@@ -218,16 +247,75 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
        print 'BEFORE'
        for node in env.toposort():
            print node.op
+        print printing.pprint(node.outputs[0])
        print '----'

        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)

+        print 'AFTER'
+        for node in env.toposort():
+            print node.op
+        print printing.pprint(node.outputs[0])
+        print '===='
        assert len(env.toposort()) == 2

        assert str(env.outputs[0].owner.op) == 'OutputGuard'
        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias

+    def test_softmax_optimizations_w_bias2(self):
+        x = tensor.matrix('x')
+        b = tensor.vector('b')
+        c = tensor.vector('c')
+        one_of_n = tensor.lvector('one_of_n')
+        op = crossentropy_categorical_1hot
+
+        env = gof.Env(
+                [x, b, c, one_of_n],
+                [op(softmax(T.add(x,b,c)), one_of_n)])
+        assert env.outputs[0].owner.op == op
+
+        print 'BEFORE'
+        for node in env.toposort():
+            print node.op
+        print '----'
+
+        theano.compile.mode.optdb.query(
+                theano.compile.mode.OPT_FAST_RUN).optimize(env)
+
+        print 'AFTER'
+        for node in env.toposort():
+            print node.op
+        print '===='
+        assert len(env.toposort()) == 3
+
+        assert str(env.outputs[0].owner.op) == 'OutputGuard'
+        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
+
+    def test_softmax_optimizations_w_bias_vector(self):
+        x = tensor.vector('x')
+        b = tensor.vector('b')
+        one_of_n = tensor.lvector('one_of_n')
+        op = crossentropy_categorical_1hot
+        env = gof.Env(
+                [x, b, one_of_n],
+                [op(softmax(x+b), one_of_n)])
+        assert env.outputs[0].owner.op == op
+        print 'BEFORE'
+        for node in env.toposort():
+            print node.op
+        print printing.pprint(node.outputs[0])
+        print '----'
+
+        theano.compile.mode.optdb.query(
+                theano.compile.mode.OPT_FAST_RUN).optimize(env)
+        print 'AFTER'
+        for node in env.toposort():
+            print node.op
+        print '===='
+        assert len(env.toposort()) == 3
+        assert str(env.outputs[0].owner.op) == 'OutputGuard'
+        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias

    def test_softmax_grad_optimizations(self):
        x = tensor.matrix('x')
@@ -274,6 +362,49 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
        assert not has_softmax
        assert not has_softmaxdx

+    def test_softmax_grad_optimizations_vector(self):
+        x = tensor.vector('x')
+        one_of_n = tensor.lvector('one_of_n')
+        op = crossentropy_categorical_1hot
+        xe = op(softmax(x), one_of_n)
+        sum_xe = tensor.sum(xe)
+        g_x = tensor.grad(sum_xe, x)
+        env = gof.Env(
+                [x, one_of_n],
+                [g_x])
+
+        print 'BEFORE'
+        for node in env.toposort():
+            print node.op, node.inputs
+        print '----'
+        theano.compile.mode.optdb.query(
+                theano.compile.mode.OPT_FAST_RUN).optimize(env)
+
+        print 'AFTER'
+        for node in env.toposort():
+            print node.op, node.inputs
+
+        # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
+        # cleaned up as well as we'd like.
+        has_cx1hot = False
+        has_cx1hotdx = False
+        has_softmax = False
+        has_softmaxdx = False
+        for node in env.toposort():
+            if node.op == crossentropy_softmax_argmax_1hot_with_bias:
+                has_cx1hot = True
+            if node.op == crossentropy_softmax_1hot_with_bias_dx :
+                has_cx1hotdx = True
+            if node.op == softmax:
+                has_softmax = True
+            if node.op == softmax_grad:
+                has_softmaxdx = True
+
+        assert has_cx1hot
+        assert has_cx1hotdx
+        assert not has_softmax
+        assert not has_softmaxdx
+
    def test_get_rid_of_advanced_indexing_version_of_xent(self):
        verbose = 0
        if verbose:
@@ -408,6 +539,105 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                theano.printing.debugprint(g)
                raise

+    def test_optimize_xent_vector(self):
+        verbose = 0
+        mode = theano.compile.mode.get_default_mode()
+        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
+            mode = 'FAST_RUN'
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        x_val = rng.randn(5)
+        y_val = numpy.asarray([2])
+
+        x = T.dvector('x')
+        y = T.lvector('y')
+
+        def print_graph(func):
+            for i, node in enumerate(func.maker.env.toposort()):
+                print i, node
+            # Last node should be the output
+            print i, printing.pprint(node.outputs[0])
+            print
+
+        ## Test that a biased softmax is optimized correctly
+        bias_expressions = [
+                T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]
+
+        for expr in bias_expressions:
+            f = theano.function([x,y], expr, mode=mode)
+            if verbose: print_graph(f)
+            try:
+                prev, last = f.maker.env.toposort()[-2:]
+                assert len(f.maker.env.toposort()) == 5
+                f(x_val, y_val)
+            except:
+                theano.printing.debugprint(f)
+                raise
+
+            g = theano.function([x,y], T.grad(expr, x), mode=mode)
+            print_graph(g)
+            try:
+                ops = [node.op for node in g.maker.env.toposort()]
+                assert len(ops) == 4
+                assert crossentropy_softmax_1hot_with_bias_dx in ops
+                assert softmax in ops
+                assert softmax_grad not in ops
+                g(x_val, y_val)
+            except:
+                theano.printing.debugprint(g)
+                raise
+
+    def test_optimize_xent_vector2(self):
+        verbose = 0
+        mode = theano.compile.mode.get_default_mode()
+        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
+            mode = 'FAST_RUN'
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        x_val = rng.randn(5)
+        b_val = rng.randn(5)
+        y_val = numpy.asarray([2])
+
+        x = T.dvector('x')
+        b = T.dvector('b')
+        y = T.lvector('y')
+
+        def print_graph(func):
+            for i, node in enumerate(func.maker.env.toposort()):
+                print i, node
+            # Last node should be the output
+            print i, printing.pprint(node.outputs[0])
+            print
+
+        ## Test that a biased softmax is optimized correctly
+        bias_expressions = [
+                T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
+                T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
+
+        for expr in bias_expressions:
+            f = theano.function([x,b,y], expr, mode=mode)
+            if verbose: print_graph(f)
+            try:
+                prev, last = f.maker.env.toposort()[-2:]
+                assert len(f.maker.env.toposort()) == 3 # [big_op, sum, dim_shuffle]
+                f(x_val, b_val, y_val)
+            except:
+                theano.printing.debugprint(f)
+                raise
+
+            g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
+            print_graph(g)
+            try:
+                ops = [node.op for node in g.maker.env.toposort()]
+                assert len(ops) <= 6
+                assert crossentropy_softmax_1hot_with_bias_dx in ops
+                assert softmax_with_bias in ops
+                assert softmax_grad not in ops
+                g(x_val, b_val, y_val)
+            except:
+                theano.printing.debugprint(g)
+                raise

    def test_scale_cost(self):
        # TODO: add the optimization in FAST_COMPILE?