Move test_softmax_grad_optimizations inside of T_CrossentropyCategorical1Hot,

add new test test_scale_cost.

Move test_softmax_grad_optimizations inside of T_CrossentropyCategorical1Hot,
383d965b · Pascal Lamblin · 5d367913 · 383d965b
--- a/theano/tensor/tests/test_nnet.py
+++ b/theano/tensor/tests/test_nnet.py
@@ -223,89 +223,13 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
        assert not has_softmax
        assert not has_softmaxdx
-def test_argmax_pushdown():
+    def test_get_rid_of_advanced_indexing_version_of_xent(self):
-    x = tensor.dmatrix()
-    env = gof.Env(
-            [x],
-            [tensor.max(softmax(tensor.exp(tensor.tanh(sigmoid(x)))))])
-    theano.compile.mode.optdb.query(
-            theano.compile.mode.OPT_FAST_RUN).optimize(env)
-    #print 'AFTER'
-    #for node in env.toposort():
-        #print node.op
-    assert len(env.toposort()) == 2 # an output_guard is second
-    assert env.toposort()[0].op == tensor._max_and_argmax
-def test_argmax_pushdown_bias():
-    x = tensor.dmatrix()
-    b = tensor.dvector()
-    env = gof.Env(
-            [x,b],
-            [tensor.max(softmax_with_bias(x, b))])
-    theano.compile.mode.optdb.query(
-            theano.compile.mode.OPT_FAST_RUN).optimize(env)
-    print 'AFTER'
-    for node in env.toposort():
-        print node.op
-    assert len(env.toposort()) == 4
-    assert isinstance(env.toposort()[0].op, tensor.DimShuffle)
-    assert isinstance(env.toposort()[1].op, tensor.Elemwise)
-    assert isinstance(env.toposort()[2].op, tensor.MaxAndArgmax)
-    assert str(env.toposort()[3].op) == 'OutputGuard'
-def test_asymptotic_32():
-    """
-    This test makes sure that our functions behave sensibly when huge values are present
-    """
-    for dtype in 'float32', 'float64':
-        if dtype == 'float32':
-            x = tensor.fmatrix()
-            x2 = tensor.fvector()
-        else:
-            x = tensor.dmatrix()
-            x2 = tensor.dvector()
-        y = tensor.lvector()
-        c = categorical_crossentropy(softmax(x+x2), y)
-        f = theano.function([x,y,x2], [c.sum(), tensor.grad(c, x)])
-        if 0:
-            for i, n in enumerate( f.maker.env.toposort()):
-                print i, n
-        xval = numpy.zeros((5, 5), dtype=dtype)
-        x2val = numpy.zeros(5, dtype=xval.dtype)
-        for i in xrange(100):
-            cval, gxval =  f(xval, numpy.arange(5), x2val)
-            xval -= 100.3 * gxval
-            #print cval, gxval
-        assert cval == 0 # no problem going to zero error
-        #what about when x gets really big?
-        xval = numpy.zeros((5, 5), dtype=dtype)
-        x2val = numpy.zeros(5, dtype=xval.dtype)
-        for i in xrange(100):
-            cval, gxval =  f(xval, numpy.arange(5), x2val)
-            xval += 100000.3 * gxval
-            #print cval, gxval
-        assert cval > 61750000
-        assert gxval[0,0] == -1.0
-        assert gxval[0,1] == 0.25
-def test_get_rid_of_advanced_indexing_version_of_xent():
        verbose = 0
-    if 0: mode = 'DEBUG_MODE'
+        # TODO: add the optimization in FAST_COMPILE?
-    else: mode = 'FAST_RUN'
+        # In the mean time, run it as 'FAST_RUN' instead
+        mode = theano.compile.mode.get_default_mode()
+        if mode == 'FAST_COMPILE':
+            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
@@ -322,13 +246,15 @@ def test_get_rid_of_advanced_indexing_version_of_xent():
                print i, node
            # Last node should be the output
            print i, pprint(node.outputs[0])
+            print
        ## Basic case
        expressions = [
                T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
                -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
                -T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
-            T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])]
+                T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])
+                ]
        for expr in expressions:
            # Verify the optimizer worked on the expressions
@@ -397,6 +323,183 @@ def test_get_rid_of_advanced_indexing_version_of_xent():
            g(x_val, b_val, y_val)
+    def test_scale_cost(self):
+        # TODO: add the optimization in FAST_COMPILE?
+        # In the mean time, run it as 'FAST_RUN' instead
+        mode = theano.compile.mode.get_default_mode()
+        if mode == 'FAST_COMPILE':
+            mode = 'FAST_RUN'
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        x_val = rng.randn(3,5)
+        b_val = rng.randn(5)
+        y_val = numpy.asarray([2,4,1])
+        x = T.dmatrix('x')
+        b = T.dvector('b')
+        y = T.lvector('y')
+        a = T.dscalar('a')
+        def print_graph(func):
+            for i, node in enumerate(func.maker.env.toposort()):
+                print i, node
+            # Last node should be the output
+            print i, pprint(node.outputs[0])
+        def validate_fn_graph(func):
+            # The graph of the function should not have softmax anymore
+            has_cx1hot = False
+            has_softmax = False
+            for node in func.maker.env.toposort():
+                if node.op == crossentropy_softmax_argmax_1hot_with_bias:
+                    has_cx1hot = True
+                if node.op == softmax:
+                    has_softmax = True
+            assert has_cx1hot
+            assert not has_softmax
+        def validate_grad_graph(func):
+            # The graph of the gradient should not have softmaxgrad anymore
+            has_cx1hotdx = False
+            has_softmax = False
+            has_softmaxdx = False
+            for node in func.maker.env.toposort():
+                if node.op == crossentropy_softmax_1hot_with_bias_dx:
+                    has_cx1hotdx = True
+                if node.op == softmax:
+                    has_softmax = True
+                if node.op == softmax_grad:
+                    has_softmaxdx = True
+            assert has_cx1hotdx
+            assert has_softmax
+            assert not has_softmaxdx
+        ## Cases to test
+        expressions = [
+                a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
+                -a * T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
+                a * (-T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))),
+                a * T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
+                a * T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y]),
+                -a * T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
+                a * (-T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y])),
+                a * T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
+                a * T.mean(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
+                -a * T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])),
+                a * (-T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y]))),
+                a * T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])),
+                a * T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y]),
+                -a * T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]),
+                a * (-T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y])),
+                a * T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]),
+                ]
+        for expr in expressions:
+            # Verify the optimizer worked on the expressions
+            f = theano.function([x,y,a], expr, mode=mode)
+            assert 5 <= len(f.maker.env.toposort()) <= 10
+            validate_fn_graph(f)
+            f(x_val, y_val, 0.1)
+            # Verify the gradient wrt x
+            g = theano.function([x,y,a], T.grad(expr, x), mode=mode)
+            assert 5 <= len(g.maker.env.toposort()) <= 12
+            validate_grad_graph(g)
+            g(x_val, y_val, 0.1)
+            # Verify the gradient when providing output gradient
+            h = theano.function([x,y,a], T.grad(expr, x, g_cost=a*x.sum()), mode=mode)
+            assert 8 <= len(h.maker.env.toposort()) <= 17
+            validate_grad_graph(h)
+            h(x_val, y_val, 0.1)
+def test_argmax_pushdown():
+    x = tensor.dmatrix()
+    env = gof.Env(
+            [x],
+            [tensor.max(softmax(tensor.exp(tensor.tanh(sigmoid(x)))))])
+    theano.compile.mode.optdb.query(
+            theano.compile.mode.OPT_FAST_RUN).optimize(env)
+    #print 'AFTER'
+    #for node in env.toposort():
+        #print node.op
+    assert len(env.toposort()) == 2 # an output_guard is second
+    assert env.toposort()[0].op == tensor._max_and_argmax
+def test_argmax_pushdown_bias():
+    x = tensor.dmatrix()
+    b = tensor.dvector()
+    env = gof.Env(
+            [x,b],
+            [tensor.max(softmax_with_bias(x, b))])
+    theano.compile.mode.optdb.query(
+            theano.compile.mode.OPT_FAST_RUN).optimize(env)
+    print 'AFTER'
+    for node in env.toposort():
+        print node.op
+    assert len(env.toposort()) == 4
+    assert isinstance(env.toposort()[0].op, tensor.DimShuffle)
+    assert isinstance(env.toposort()[1].op, tensor.Elemwise)
+    assert isinstance(env.toposort()[2].op, tensor.MaxAndArgmax)
+    assert str(env.toposort()[3].op) == 'OutputGuard'
+def test_asymptotic_32():
+    """
+    This test makes sure that our functions behave sensibly when huge values are present
+    """
+    for dtype in 'float32', 'float64':
+        if dtype == 'float32':
+            x = tensor.fmatrix()
+            x2 = tensor.fvector()
+        else:
+            x = tensor.dmatrix()
+            x2 = tensor.dvector()
+        y = tensor.lvector()
+        c = categorical_crossentropy(softmax(x+x2), y)
+        f = theano.function([x,y,x2], [c.sum(), tensor.grad(c, x)])
+        if 0:
+            for i, n in enumerate( f.maker.env.toposort()):
+                print i, n
+        xval = numpy.zeros((5, 5), dtype=dtype)
+        x2val = numpy.zeros(5, dtype=xval.dtype)
+        for i in xrange(100):
+            cval, gxval =  f(xval, numpy.arange(5), x2val)
+            xval -= 100.3 * gxval
+            #print cval, gxval
+        assert cval == 0 # no problem going to zero error
+        #what about when x gets really big?
+        xval = numpy.zeros((5, 5), dtype=dtype)
+        x2val = numpy.zeros(5, dtype=xval.dtype)
+        for i in xrange(100):
+            cval, gxval =  f(xval, numpy.arange(5), x2val)
+            xval += 100000.3 * gxval
+            #print cval, gxval
+        assert cval > 61750000
+        assert gxval[0,0] == -1.0
+        assert gxval[0,1] == 0.25
    #   hint - call the argmax push-down optimization first too