Some graph optimizations are not working

b22ae136 · Harm de Vries · 4a2b55eb · b22ae136 · b22ae136
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -572,6 +572,10 @@ softmax_op = Softmax()
 def softmax_graph(c):
    return tensor.exp(c) / tensor.exp(c).sum(axis=-1, keepdims=True)
+def softmax(c):
+    if c.ndim == 1:
+        c = tensor.shape_padleft(c, n_ones=1)
+    return softmax_graph(c)    
 @opt.register_specialize('fast_compile_gpu')
 @gof.local_optimizer([softmax_op])
@@ -640,7 +644,7 @@ def softmax_simplifier(numerators, denominators):
        if not numerator.type.dtype.startswith('float'):
            continue
-        if not numerator.type.broadcastable == (False, False):
+        if numerator.ndim != 2:
            continue
        if numerator.owner and numerator.owner.op == tensor.exp:
            x = numerator.owner.inputs[0]
@@ -667,6 +671,7 @@ def softmax_simplifier(numerators, denominators):
            numerators.remove(numerator)
            denominators.remove(matching_denom)
            numerators.append(softmax_op(x))
    return numerators, denominators
 opt.local_mul_canonizer.add_simplifier(softmax_simplifier,
     'softmax_simplifier')
@@ -728,7 +733,7 @@ if 0:
                            rest.append(add_in)
                            # print 'maybe_ds =', maybe_ds
                            # if maybe_ds:
-                            #    print 'maybe_ds.ndim =', maybe_ds.ndim, ', maybe_sm.ndim =', maybe_sm.ndim
+                  #I will make a plot with the average over many realizations.            #    print 'maybe_ds.ndim =', maybe_ds.ndim, ', maybe_sm.ndim =', maybe_sm.ndim
                            continue
                        if maybe_sm is mul_inputs[0]:

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -21,7 +21,7 @@ from theano.tensor.nnet import (categorical_crossentropy,
                                CrossentropySoftmaxArgmax1HotWithBias,
                                CrossentropyCategorical1Hot,
                                CrossentropyCategorical1HotGrad,
-                                sigmoid, softplus, Softmax,
+                                sigmoid, softplus, Softmax, softmax,
                                softmax_op, softmax_graph, SoftmaxWithBias,
                                softmax_grad,
                                softmax_with_bias, SoftmaxGrad,
@@ -612,10 +612,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        # Basic case
        expressions = [
-                T.sum(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
+                T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
+                -T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
-                T.sum(-T.log(softmax_graph(x))[T.arange(y.shape[0]), y])
+                T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])
                ]
        for expr in expressions:
            # Verify the optimizer worked on the expressions
@@ -650,10 +650,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-                T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
+                T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
+                -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-                T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
+                T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
        for expr in bias_expressions:
            f = theano.function([x, b, y], expr, mode=mode)
@@ -683,10 +683,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        # Test that using "mean" instead of sum works, too
        mean_expressions = [
-                T.mean(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
+                T.mean(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-                -T.mean(T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
+                -T.mean(T.log(softmax(x)[T.arange(y.shape[0]), y])),
-                -T.mean(T.log(softmax_graph(x))[T.arange(y.shape[0]), y]),
+                -T.mean(T.log(softmax(x))[T.arange(y.shape[0]), y]),
-                T.mean(-T.log(softmax_graph(x))[T.arange(y.shape[0]), y])]
+                T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])]
        for expr in mean_expressions:
            f = theano.function([x, y], expr, mode=mode)
@@ -720,10 +720,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                raise
        mean_bias_expressions = [
-                T.mean(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
+                T.mean(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-                -T.mean(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
+                -T.mean(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-                -T.mean(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
+                -T.mean(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-                T.mean(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
+                T.mean(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
        for expr in mean_bias_expressions:
            f = theano.function([x, b, y], expr, mode=mode)
@@ -764,10 +764,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        y = T.lvector('y')
        yi = T.cast(y, 'int32')
        expressions = [
-                T.sum(-T.log(softmax_graph(x)[T.arange(yi.shape[0]), yi])),
+                T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-                -T.sum(T.log(softmax_graph(x)[T.arange(yi.shape[0]), yi])),
+                -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
-                -T.sum(T.log(softmax_graph(x))[T.arange(yi.shape[0]), yi]),
+                -T.sum(T.log(softmax(x))[T.arange(yi.shape[0]), yi]),
-                T.sum(-T.log(softmax_graph(x))[T.arange(yi.shape[0]), yi])
+                T.sum(-T.log(softmax(x))[T.arange(yi.shape[0]), yi])
                ]
        for expr in expressions:
@@ -815,8 +815,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-                T.sum(-T.log(softmax_graph(x)[T.arange(y.shape[0]), y])),
+                T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(x)[T.arange(y.shape[0]), y]))]
+                -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]
        for expr in bias_expressions:
            f = theano.function([x, y], expr, mode=mode)
@@ -862,10 +862,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-                T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
+                T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
+                -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-                T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
+                T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
        for expr in bias_expressions:
            f = theano.function([x, b, y], expr, mode=mode)
@@ -923,10 +923,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-                T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
+                T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
+                -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-                T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
+                T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
        for expr in bias_expressions:
            f = theano.function([x, b, y_], expr, mode=mode)
@@ -985,10 +985,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        # Test that a biased softmax is optimized correctly
        bias_expressions = [
-                T.sum(-T.log(softmax_graph(x + b)[T.arange(y.shape[0]), y])),
+                T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(b + x)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax_graph(x + b))[T.arange(y.shape[0]), y]),
+                -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
-                T.sum(-T.log(softmax_graph(b + x))[T.arange(y.shape[0]), y])]
+                T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]
        for expr in bias_expressions:
            f = theano.function([x, b, y_], expr, mode=mode)