test_nnet.py: layout cleanup

fe026b97 · Eric Larsen · Frederic · 9c66685f · fe026b97
--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -22,42 +22,53 @@ from theano.tensor.nnet import (categorical_crossentropy,
                                CrossentropyCategorical1Hot,
                                CrossentropyCategorical1HotGrad,
                                sigmoid, softplus,
-                                Softmax, softmax, SoftmaxWithBias, softmax_grad,
+                                Softmax, softmax, SoftmaxWithBias,
+                                softmax_grad,
                                softmax_with_bias, SoftmaxGrad,
                                Prepend_scalar_constant_to_each_row,
                                Prepend_scalar_to_each_row)
 from theano.tensor import dmatrix, dvector, lvector, dscalar

+
 class T_sigmoid(unittest.TestCase):
+
    def setUp(self):
        utt.seed_rng()
+
    def test_elemwise(self):
-        utt.verify_grad(sigmoid, [numpy.random.rand(3,4)])
+        utt.verify_grad(sigmoid, [numpy.random.rand(3, 4)])
+

 class T_softplus(unittest.TestCase):
+
    def setUp(self):
        utt.seed_rng()
+
    def test_elemwise(self):
-        utt.verify_grad(softplus, [numpy.random.rand(3,4)])
+        utt.verify_grad(softplus, [numpy.random.rand(3, 4)])
+

 class T_Softmax(utt.InferShapeTester):
-   
+
    def test0(self):
        def f(a):
-            return softmax(a)[:,0]
-        utt.verify_grad(f, [numpy.random.rand(3,4)])
+            return softmax(a)[:, 0]
+        utt.verify_grad(f, [numpy.random.rand(3, 4)])
+
    def test1(self):
        def f(a):
-            return softmax(a)[:,1]
-        utt.verify_grad(f, [numpy.random.rand(3,4)])
+            return softmax(a)[:, 1]
+        utt.verify_grad(f, [numpy.random.rand(3, 4)])
+
    def test2(self):
        def f(a):
-            return softmax(a)[:,2]
-        utt.verify_grad(f, [numpy.random.rand(3,4)])
+            return softmax(a)[:, 2]
+        utt.verify_grad(f, [numpy.random.rand(3, 4)])
+
    def test3(self):
        def f(a):
-            return softmax(a)[:,3]
-        utt.verify_grad(f, [numpy.random.rand(3,4)])
+            return softmax(a)[:, 3]
+        utt.verify_grad(f, [numpy.random.rand(3, 4)])

    def test_infer_shape(self):
        admat = dmatrix()
@@ -71,6 +82,7 @@ class T_Softmax(utt.InferShapeTester):

        xv = numpy.random.randn(6).astype(config.floatX)
        assert numpy.allclose(f(xv), numpy.exp(xv) / numpy.exp(xv).sum())
+
    def test_vector_grad(self):
        def f(a):
            return softmax(a)
@@ -78,46 +90,49 @@ class T_Softmax(utt.InferShapeTester):


 class T_SoftmaxWithBias(utt.InferShapeTester):
-    
+
    def test0(self):
        def f(a, b):
-            return softmax_with_bias(a, b)[:,0]
-        utt.verify_grad(f, [numpy.random.rand(3,4),
+            return softmax_with_bias(a, b)[:, 0]
+        utt.verify_grad(f, [numpy.random.rand(3, 4),
            numpy.random.rand(4)])
+
    def test1(self):
        def f(a, b):
-            return softmax_with_bias(a, b)[:,1]
-        utt.verify_grad(f, [numpy.random.rand(3,4),
+            return softmax_with_bias(a, b)[:, 1]
+        utt.verify_grad(f, [numpy.random.rand(3, 4),
            numpy.random.rand(4)])
+
    def test2(self):
        def f(a, b):
-            return softmax_with_bias(a, b)[:,2]
-        utt.verify_grad(f, [numpy.random.rand(3,4),
+            return softmax_with_bias(a, b)[:, 2]
+        utt.verify_grad(f, [numpy.random.rand(3, 4),
            numpy.random.rand(4)])
+
    def test3(self):
        def f(a, b):
-            return softmax_with_bias(a, b)[:,3]
-        utt.verify_grad(f, [numpy.random.rand(3,4),
+            return softmax_with_bias(a, b)[:, 3]
+        utt.verify_grad(f, [numpy.random.rand(3, 4),
            numpy.random.rand(4)])
+
    def test_broadcast(self):
        #test that we don't raise an error during optimization for no good
        #reason as softmax_with_bias don't support correctly some/all
        #broadcasted inputs pattern
-        initial_W = numpy.asarray( [[0.1,0.1,0.1], \
-                            [0.1,0.1,0.1], \
-                            [0.1,0.1,0.1]], \
-                            dtype = theano.config.floatX)
-        W = theano.shared(value = initial_W, name = 'W')
-        vbias=theano.shared(value=0.1, name='vbias') #0.01
-        hid=T.vector('hid')
-
+        initial_W = numpy.asarray([[0.1, 0.1, 0.1], \
+                            [0.1, 0.1, 0.1], \
+                            [0.1, 0.1, 0.1]], \
+                            dtype=theano.config.floatX)
+        W = theano.shared(value=initial_W, name='W')
+        vbias = theano.shared(value=0.1, name='vbias')  # 0.01
+        hid = T.vector('hid')
        f = theano.function([hid],
                            T.nnet.softmax(T.dot(hid, W.T) + vbias))
        ops = [node.op for node in f.maker.fgraph.toposort()]
        assert softmax_with_bias not in ops
        assert softmax in ops

-        f([0,1,0])
+        f([0, 1, 0])
        #print f.maker.fgraph.toposort()

    def test_infer_shape(self):
@@ -125,13 +140,14 @@ class T_SoftmaxWithBias(utt.InferShapeTester):
        advec = dvector()
        admat_val = numpy.random.rand(3, 4)
        advec_val = numpy.random.rand(4)
-        self._compile_and_check([admat, advec], [SoftmaxWithBias()(admat, advec)],
+        self._compile_and_check([admat, advec],
+                            [SoftmaxWithBias()(admat, advec)],
                            [admat_val, advec_val], SoftmaxWithBias)


 class T_SoftmaxGrad(utt.InferShapeTester):
-    def test_infer_shape(self):

+    def test_infer_shape(self):
        admat = dmatrix()
        bdmat = dmatrix()
        admat_val = numpy.random.rand(3, 4)
@@ -140,48 +156,60 @@ class T_SoftmaxGrad(utt.InferShapeTester):
                            [admat_val, bdmat_val], SoftmaxGrad)


-
 class T_CrossentropySoftmax1Hot(unittest.TestCase):
+
    def setUp(self):
        utt.seed_rng()
+
    def test0(self):
-        y_idx = [0,1,3]
+        y_idx = [0, 1, 3]
+
        def f(a, b):
            return crossentropy_softmax_1hot_with_bias(a, b, y_idx)[0]
-        utt.verify_grad(f, [numpy.random.rand(3,4),
+        utt.verify_grad(f, [numpy.random.rand(3, 4),
            numpy.random.rand(4)])
+
    def test1(self):
-        y_idx = [0,1,3]
+        y_idx = [0, 1, 3]
+
        def f(a):
            return crossentropy_softmax_1hot(a, y_idx)[0]
-        utt.verify_grad(f, [numpy.random.rand(3,4)])
+        utt.verify_grad(f, [numpy.random.rand(3, 4)])
+
    def test_vector(self):
        y_idx = [3]
+
        def f(a):
            return crossentropy_softmax_1hot(T.shape_padleft(a), y_idx)[0]
        utt.verify_grad(f, [numpy.random.rand(4)])
+
    def test_vectors(self):
        y_idx = [3]
+
        def f(a, b):
-            return crossentropy_softmax_1hot(T.shape_padleft(a)+b, y_idx)[0]
+            return crossentropy_softmax_1hot(T.shape_padleft(a) + b, y_idx)[0]
        utt.verify_grad(f, [numpy.random.rand(4), numpy.random.rand(4)])

+ 
 class T_CrossentropySoftmax1HotWithBiasDx(utt.InferShapeTester):
-    
+
    def test0(self):
        def f(sm):
            return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
                numpy.random.rand(10),  # Gradient w.r.t. NLL.
                sm,                     # Softmax output.
-                numpy.random.randint(low=0, high=5, size=10))) # Class indices.
+                numpy.random.randint(low=0,
+                high=5, size=10)))  # Class indices.
        # Build a random softmax output whose rows sum to 1.
        softmax_output = numpy.random.rand(10, 5)
        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
        utt.verify_grad(f, [softmax_output])
+
    def test1(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        softmax_output = rng.rand(10, 5)
        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
+        
        def f(dy):
            return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
                dy,
@@ -208,15 +236,18 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
    def setUp(self):
        super(T_CrossentropySoftmaxArgmax1HotWithBias, self).setUp()
        self.op = theano.tensor.nnet.crossentropy_softmax_argmax_1hot_with_bias
+
    def test0(self):
        n_classes = 5
        n_samples = 3
+
        # First test gradient when getting a gradient on the NLL output.
        def grad_on_nll(x, b):
            return self.op(x, b, y_idx=numpy.random.randint(
                low=0, high=n_classes, size=n_samples))[0]
        utt.verify_grad(grad_on_nll, [numpy.random.rand(n_samples, n_classes),
            numpy.random.rand(n_classes)])
+
        # Then test gradient when getting a gradient on the softmax output.
        def grad_on_softmax(x, b):
            return self.op(x, b, y_idx=numpy.random.randint(
@@ -234,21 +265,21 @@ class T_CrossentropySoftmaxArgmax1HotWithBias(utt.InferShapeTester):
        advec_val = rng.rand(5)
        alvec_val = rng.randint(low=0, high=5, size=3)
        self._compile_and_check([admat, advec, alvec],
-                    CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec),
-                    [admat_val, advec_val, alvec_val],
-                    CrossentropySoftmaxArgmax1HotWithBias)
+                CrossentropySoftmaxArgmax1HotWithBias()(admat, advec, alvec),
+                [admat_val, advec_val, alvec_val],
+                CrossentropySoftmaxArgmax1HotWithBias)


 class T_prepend(utt.InferShapeTester):
+
    def test0(self):
-      
-        x=tensor.matrix('x')
-        y=Prepend_scalar_constant_to_each_row(4.)(x)
-        f=theano.function([x],[y])
-        m=numpy.random.rand(3,5)
+        x = tensor.matrix('x')
+        y = Prepend_scalar_constant_to_each_row(4.)(x)
+        f = theano.function([x], [y])
+        m = numpy.random.rand(3, 5)
        my = f(m)
        self.assertTrue(my.shape == (3, 6), my.shape)
-        self.assertTrue(numpy.all( my[:,0] == 4.0))
+        self.assertTrue(numpy.all(my[:, 0] == 4.0))

    def test_infer_shape(self):
        admat = dmatrix()
@@ -262,15 +293,16 @@ class T_prepend(utt.InferShapeTester):


 class T_prepend(utt.InferShapeTester):
+
    def test0(self):
        """basic functionality"""
-        x=tensor.matrix('x')
-        y=Prepend_scalar_to_each_row()(5.,x)
-        f=theano.function([x],y)
-        m=numpy.ones((3,5),dtype="float32")
+        x = tensor.matrix('x')
+        y = Prepend_scalar_to_each_row()(5., x)
+        f = theano.function([x], y)
+        m = numpy.ones((3, 5), dtype="float32")
        my = f(m)
        self.assertTrue(my.shape == (3, 6))
-        self.assertTrue(numpy.all(my[:,0] == 5.0))
+        self.assertTrue(numpy.all(my[:, 0] == 5.0))

    def test_infer_shape(self):
        admat = dmatrix()
@@ -300,24 +332,20 @@ class T_CrossentropyCategorical1HotGrad(utt.InferShapeTester):


 class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
+
    def test_grad(self):
        x = tensor.matrix('x')
        one_of_n = tensor.lvector('one_of_n')
-
        op = crossentropy_categorical_1hot
-
        xe = op(x, one_of_n)
-
        f = theano.function([x, one_of_n], xe)
-
        x_val = numpy.asarray([[.4, .6, .0], [.1, .8, .1]],
                dtype=config.floatX)
-        xe_val = f(x_val, [0,1])
-
+        xe_val = f(x_val, [0, 1])
        assert numpy.allclose(xe_val, -numpy.log([.4, .8]))

        def oplike(x):
-            return op(x, [0,1])
+            return op(x, [0, 1])

        tensor.verify_grad(oplike, [x_val], rng=numpy.random)

@@ -336,7 +364,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        x = tensor.matrix('x')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot
-
        xe = op(x, one_of_n)

        fgraph = gof.FunctionGraph(
@@ -348,7 +375,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)

        assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
-        assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
+        assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
+                crossentropy_softmax_argmax_1hot_with_bias)

    def test_softmax_optimizations_vector(self):
        x = tensor.vector('x')
@@ -362,19 +390,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)
        assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
-        assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
+        assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
+                crossentropy_softmax_argmax_1hot_with_bias)

    def test_softmax_optimizations_w_bias(self):
        x = tensor.matrix('x')
        b = tensor.vector('b')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot
-
        xe = op(x, one_of_n)

        fgraph = gof.FunctionGraph(
                [x, b, one_of_n],
-                [op(softmax(x+b), one_of_n)])
+                [op(softmax(x + b), one_of_n)])
        assert fgraph.outputs[0].owner.op == op

        #print 'BEFORE'
@@ -394,7 +422,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        assert len(fgraph.toposort()) == 2

        assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
-        assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
+        assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
+                crossentropy_softmax_argmax_1hot_with_bias)

    def test_softmax_optimizations_w_bias2(self):
        x = tensor.matrix('x')
@@ -405,7 +434,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):

        fgraph = gof.FunctionGraph(
                [x, b, c, one_of_n],
-                [op(softmax(T.add(x,b,c)), one_of_n)])
+                [op(softmax(T.add(x, b, c)), one_of_n)])
        assert fgraph.outputs[0].owner.op == op

        #print 'BEFORE'
@@ -423,7 +452,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        assert len(fgraph.toposort()) == 3

        assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
-        assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
+        assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
+                crossentropy_softmax_argmax_1hot_with_bias)

    def test_softmax_optimizations_w_bias_vector(self):
        x = tensor.vector('x')
@@ -432,7 +462,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        op = crossentropy_categorical_1hot
        fgraph = gof.FunctionGraph(
                [x, b, one_of_n],
-                [op(softmax(x+b), one_of_n)])
+                [op(softmax(x + b), one_of_n)])
        assert fgraph.outputs[0].owner.op == op
        #print 'BEFORE'
        #for node in fgraph.toposort():
@@ -448,15 +478,14 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        #print '===='
        assert len(fgraph.toposort()) == 3
        assert str(fgraph.outputs[0].owner.op) == 'OutputGuard'
-        assert fgraph.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
+        assert (fgraph.outputs[0].owner.inputs[0].owner.op ==
+                crossentropy_softmax_argmax_1hot_with_bias)

    def test_softmax_grad_optimizations(self):
        x = tensor.matrix('x')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot
-
        xe = op(softmax(x), one_of_n)
-
        sum_xe = tensor.sum(xe)
        g_x = tensor.grad(sum_xe, x)
        fgraph = gof.FunctionGraph(
@@ -474,8 +503,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        #for node in fgraph.toposort():
        #    print node.op, node.inputs

-        # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
-        # cleaned up as well as we'd like.
+        # the function has 9 ops because the dimshuffle and lemwise{second}
+        # aren't getting cleaned up as well as we'd like.
        has_cx1hot = False
        has_cx1hotdx = False
        has_softmax = False
@@ -483,13 +512,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        for node in fgraph.toposort():
            if node.op == crossentropy_softmax_argmax_1hot_with_bias:
                has_cx1hot = True
-            if node.op == crossentropy_softmax_1hot_with_bias_dx :
+            if node.op == crossentropy_softmax_1hot_with_bias_dx:
                has_cx1hotdx = True
            if node.op == softmax:
                has_softmax = True
            if node.op == softmax_grad:
                has_softmaxdx = True
-
        assert has_cx1hot
        assert has_cx1hotdx
        assert not has_softmax
@@ -517,8 +545,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        #for node in fgraph.toposort():
        #    print node.op, node.inputs

-        # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
-        # cleaned up as well as we'd like.
+        # the function has 9 ops because the dimshuffle and elemwise{second}
+        # aren't getting cleaned up as well as we'd like.
        has_cx1hot = False
        has_cx1hotdx = False
        has_softmax = False
@@ -526,13 +554,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        for node in fgraph.toposort():
            if node.op == crossentropy_softmax_argmax_1hot_with_bias:
                has_cx1hot = True
-            if node.op == crossentropy_softmax_1hot_with_bias_dx :
+            if node.op == crossentropy_softmax_1hot_with_bias_dx:
                has_cx1hotdx = True
            if node.op == softmax:
                has_softmax = True
            if node.op == softmax_grad:
                has_softmaxdx = True
-
        assert has_cx1hot
        assert has_cx1hotdx
        assert not has_softmax
@@ -547,17 +574,14 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
-
        rng = numpy.random.RandomState(utt.fetch_seed())
-
-        x_val = rng.randn(3,5)
+        x_val = rng.randn(3, 5)
        b_val = rng.randn(5)
-        y_val = numpy.asarray([2,4,1])
-
+        y_val = numpy.asarray([2, 4, 1])
        x = T.dmatrix('x')
        b = T.dvector('b')
        y = T.lvector('y')
-
+        
        ## Basic case
        expressions = [
                T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
@@ -565,10 +589,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                -T.sum(T.log(softmax(x))[T.arange(y.shape[0]), y]),
                T.sum(-T.log(softmax(x))[T.arange(y.shape[0]), y])
                ]
-
        for expr in expressions:
+
            # Verify the optimizer worked on the expressions
-            f = theano.function([x,y], expr, mode=mode)
+            f = theano.function([x, y], expr, mode=mode)
            if verbose:
                theano.printing.debugprint(f)
            try:
@@ -579,7 +603,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                raise

            # Also verify the gradient wrt x
-            g = theano.function([x,y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, y], T.grad(expr, x), mode=mode)
            if verbose:
                theano.printing.debugprint(g)
            try:
@@ -591,23 +615,22 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):

        ## Test that a biased softmax is optimized correctly
        bias_expressions = [
-                T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
-                T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
+                T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
+                T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]

        for expr in bias_expressions:
-            f = theano.function([x,b,y], expr, mode=mode)
+            f = theano.function([x, b, y], expr, mode=mode)
            if verbose:
                theano.printing.debugprint(f)
            try:
-                assert len(f.maker.fgraph.toposort()) == 2 # [big_op, sum]
+                assert len(f.maker.fgraph.toposort()) == 2  # [big_op, sum]
                f(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
                raise
-
-            g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
            if verbose:
                theano.printing.debugprint(g)
            try:
@@ -625,7 +648,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                T.mean(-T.log(softmax(x))[T.arange(y.shape[0]), y])]

        for expr in mean_expressions:
-            f = theano.function([x,y], expr, mode=mode)
+            f = theano.function([x, y], expr, mode=mode)
            if verbose:
                theano.printing.debugprint(f)
            try:
@@ -635,11 +658,12 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                theano.printing.debugprint(f)
                raise

-            g = theano.function([x,y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, y], T.grad(expr, x), mode=mode)
            if verbose:
                theano.printing.debugprint(g)
            try:
-                assert len(g.maker.fgraph.toposort()) in (6,7) #there's an extra dimshuffle in there
+                assert len(g.maker.fgraph.toposort()) in (6, 7)
+                #there's an extra dimshuffle in there
                # but I can't think of a good rule to get rid of it
                g(x_val, y_val)
            except Exception:
@@ -647,13 +671,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                raise

        mean_bias_expressions = [
-                T.mean(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
-                -T.mean(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
-                -T.mean(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
-                T.mean(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
+                T.mean(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
+                -T.mean(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
+                -T.mean(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
+                T.mean(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]

        for expr in mean_bias_expressions:
-            f = theano.function([x,b,y], expr, mode=mode)
+            f = theano.function([x, b, y], expr, mode=mode)
            if verbose:
                theano.printing.debugprint(f)
            try:
@@ -661,12 +685,11 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
            except Exception:
                theano.printing.debugprint(f)
                raise
-
-            g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
            if verbose:
                theano.printing.debugprint(g)
            try:
-                assert len(g.maker.fgraph.toposort()) in (6,7)
+                assert len(g.maker.fgraph.toposort()) in (6, 7)
                g(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(g)
@@ -678,15 +701,13 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
        rng = numpy.random.RandomState(utt.fetch_seed())
-        x_val = rng.randn(3,5)
+        x_val = rng.randn(3, 5)
        b_val = rng.randn(5)
-        y_val = numpy.asarray([2,4,1], dtype='int64')
-
+        y_val = numpy.asarray([2, 4, 1], dtype='int64')
        x = T.dmatrix('x')
        b = T.dvector('b')
        y = T.lvector('y')
        yi = T.cast(y, 'int32')
-
        expressions = [
                T.sum(-T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
                -T.sum(T.log(softmax(x)[T.arange(yi.shape[0]), yi])),
@@ -696,7 +717,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):

        for expr in expressions:
            # Verify the optimizer worked on the expressions
-            f = theano.function([x,y], expr, mode=mode)
+            f = theano.function([x, y], expr, mode=mode)
            if verbose:
                theano.printing.debugprint(f)
            try:
@@ -707,7 +728,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                raise

            # Also verify the gradient wrt x
-            g = theano.function([x,y], T.grad(expr, x), mode=mode)
+            g = theano.function([x, y], T.grad(expr, x), mode=mode)
            if verbose:
                theano.printing.debugprint(g)
            try:
@@ -717,7 +738,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                theano.printing.debugprint(g)
                raise

-
    def test_optimize_xent_vector(self):
        verbose = 0
        mode = theano.compile.mode.get_default_mode()
@@ -743,8 +763,9 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                -T.sum(T.log(softmax(x)[T.arange(y.shape[0]), y]))]

        for expr in bias_expressions:
-            f = theano.function([x,y], expr, mode=mode)
-            if verbose: print_graph(f)
+            f = theano.function([x, y], expr, mode=mode)
+            if verbose:
+                print_graph(f)
            try:
                prev, last = f.maker.fgraph.toposort()[-2:]
                assert len(f.maker.fgraph.toposort()) == 5
@@ -752,7 +773,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
            except Exception:
                theano.printing.debugprint(f)
                raise
-
            g = theano.function([x,y], T.grad(expr, x), mode=mode)
            print_graph(g)
            try:
@@ -789,17 +809,19 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):

        ## Test that a biased softmax is optimized correctly
        bias_expressions = [
-                T.sum(-T.log(softmax(x+b)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax(b+x)[T.arange(y.shape[0]), y])),
-                -T.sum(T.log(softmax(x+b))[T.arange(y.shape[0]), y]),
-                T.sum(-T.log(softmax(b+x))[T.arange(y.shape[0]), y])]
+                T.sum(-T.log(softmax(x + b)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(b + x)[T.arange(y.shape[0]), y])),
+                -T.sum(T.log(softmax(x + b))[T.arange(y.shape[0]), y]),
+                T.sum(-T.log(softmax(b + x))[T.arange(y.shape[0]), y])]

        for expr in bias_expressions:
-            f = theano.function([x,b,y], expr, mode=mode)
-            if verbose: print_graph(f)
+            f = theano.function([x, b, y], expr, mode=mode)
+            if verbose:
+                print_graph(f)
            try:
                prev, last = f.maker.fgraph.toposort()[-2:]
-                assert len(f.maker.fgraph.toposort()) == 3 # [big_op, sum, dim_shuffle]
+                assert len(f.maker.fgraph.toposort()) == 3
+                # [big_op, sum, dim_shuffle]
                f(x_val, b_val, y_val)
            except Exception:
                theano.printing.debugprint(f)
@@ -808,7 +830,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
            backup = config.warn.sum_div_dimshuffle_bug
            config.warn.sum_div_dimshuffle_bug = False
            try:
-                g = theano.function([x,b,y], T.grad(expr, x), mode=mode)
+                g = theano.function([x, b, y], T.grad(expr, x), mode=mode)
            finally:
                config.warn.sum_div_dimshuffle_bug = backup

@@ -830,13 +852,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        mode = theano.compile.mode.get_default_mode()
        if mode == theano.compile.mode.get_mode('FAST_COMPILE'):
            mode = 'FAST_RUN'
-
        rng = numpy.random.RandomState(utt.fetch_seed())
-
-        x_val = rng.randn(3,5)
+        x_val = rng.randn(3, 5)
        b_val = rng.randn(5)
-        y_val = numpy.asarray([2,4,1])
-
+        y_val = numpy.asarray([2, 4, 1])
        x = T.dmatrix('x')
        b = T.dvector('b')
        y = T.lvector('y')
@@ -878,7 +897,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
            assert has_softmax
            assert not has_softmaxdx

-
        ## Cases to test
        expressions = [
                a * T.sum(-T.log(softmax(x)[T.arange(y.shape[0]), y])),
@@ -904,7 +922,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):

        for expr in expressions:
            # Verify the optimizer worked on the expressions
-            f = theano.function([x,y,a], expr, mode=mode)
+            f = theano.function([x, y, a], expr, mode=mode)
            try:
                assert 5 <= len(f.maker.fgraph.toposort()) <= 10
                validate_fn_graph(f)
@@ -914,7 +932,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                raise

            # Verify the gradient wrt x
-            g = theano.function([x,y,a], T.grad(expr, x), mode=mode)
+            g = theano.function([x, y, a], T.grad(expr, x), mode=mode)
            try:
                assert 5 <= len(g.maker.fgraph.toposort()) <= 12
                validate_grad_graph(g)
@@ -924,7 +942,8 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                raise

            # Verify the gradient when providing output gradient
-            h = theano.function([x,y,a], T.grad(expr, x, g_cost=a*x.sum()), mode=mode)
+            h = theano.function([x, y, a],
+                                T.grad(expr, x, g_cost=a * x.sum()), mode=mode)
            try:
                assert 8 <= len(h.maker.fgraph.toposort()) <= 17
                validate_grad_graph(h)
@@ -944,14 +963,13 @@ def test_argmax_pushdown():
    fgraph = gof.FunctionGraph(
            [x],
            [out])
-
    theano.compile.mode.optdb.query(
            theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)

    #print 'AFTER'
    #for node in fgraph.toposort():
        #print node.op
-    assert len(fgraph.toposort()) == 2 # an output_guard is second
+    assert len(fgraph.toposort()) == 2  # an output_guard is second
    assert fgraph.toposort()[0].op == tensor.basic._max_and_argmax
    assert str(fgraph.toposort()[1].op) == 'OutputGuard'
    x = tensor.dmatrix()
@@ -981,14 +999,13 @@ def test_argmax_pushdown():
    assert isinstance(fgraph.toposort()[2].op.scalar_op, theano.scalar.Maximum)
    assert str(fgraph.toposort()[3].op) == 'OutputGuard'

-
 def test_argmax_pushdown_bias():
    x = tensor.dmatrix()
    b = tensor.dvector()

    out = tensor.argmax(softmax_with_bias(x, b), axis=-1)
    fgraph = gof.FunctionGraph(
-            [x,b],
+            [x, b],
            [out])

    theano.compile.mode.optdb.query(
@@ -1005,10 +1022,9 @@ def test_argmax_pushdown_bias():

    x = tensor.dmatrix()
    b = tensor.dvector()
-
    out = tensor.max_and_argmax(softmax_with_bias(x, b), axis=-1)[0]
    fgraph = gof.FunctionGraph(
-            [x,b],
+            [x, b],
            [out])

    backup = config.warn.argmax_pushdown_bug
@@ -1028,13 +1044,15 @@ def test_argmax_pushdown_bias():
    assert isinstance(fgraph.toposort()[1].op.scalar_op, theano.scalar.Maximum)
    assert str(fgraph.toposort()[2].op) == 'OutputGuard'

+
 def test_asymptotic_32():
    """
-    This test makes sure that our functions behave sensibly when huge values are present
+    This test makes sure that our functions behave sensibly when
+    huge values are present
    """

-    #TODO: consider adding the optimization of crossentropy into the current mode for the
-    # purpose of running this test
+    #TODO: consider adding the optimization of crossentropy into the current
+    # mode for the purpose of running this test

    for dtype in 'float32', 'float64':
        if dtype == 'float32':
@@ -1045,20 +1063,21 @@ def test_asymptotic_32():
            x2 = tensor.dvector()
        y = tensor.lvector()

-        c = categorical_crossentropy(softmax(x+x2), y)
-        f = theano.function([x,y,x2], [c.sum(), tensor.grad(c.sum(), x)], mode='FAST_RUN')
+        c = categorical_crossentropy(softmax(x + x2), y)
+        f = theano.function([x, y, x2], [c.sum(),
+                            tensor.grad(c.sum(), x)], mode='FAST_RUN')
        if 0:
-            for i, n in enumerate( f.maker.fgraph.toposort()):
+            for i, n in enumerate(f.maker.fgraph.toposort()):
                print i, n

        xval = numpy.zeros((5, 5), dtype=dtype)
        x2val = numpy.zeros(5, dtype=xval.dtype)
        for i in xrange(100):
-
-            cval, gxval =  f(xval, numpy.arange(5), x2val)
+            
+            cval, gxval = f(xval, numpy.arange(5), x2val)
            xval -= 100.3 * gxval
            #print cval, gxval
-        assert cval == 0 # no problem going to zero error
+        assert cval == 0  # no problem going to zero error

        #what about when x gets really big?

@@ -1066,56 +1085,55 @@ def test_asymptotic_32():
        x2val = numpy.zeros(5, dtype=xval.dtype)
        for i in xrange(100):

-            cval, gxval =  f(xval, numpy.arange(5), x2val)
+            cval, gxval = f(xval, numpy.arange(5), x2val)
            xval += 100000.3 * gxval
            #print cval, gxval

        assert cval > 61750000
-        assert gxval[0,0] == -1.0
-        assert gxval[0,1] == 0.25
+        assert gxval[0, 0] == -1.0
+        assert gxval[0, 1] == 0.25


 class Test_softmax_opt:
-    # Test that expressions of softmax in terms of exponentiated things divided by row sums
-    # are replaced by softmax expressions.
-
+    # Test that expressions of softmax in terms of exponentiated things
+    # divided by row sums are replaced by softmax expressions.
    #
-    # Softmax_grad isn't that interesting as an Op, but it's the signature we look for when
-    # trying to insert CrossEntropySoftmax... grad.  So for now, we add softmax_grad to graphs.
-    # In future, we may modify the CrossEntropySoftmax...grad to look for the more basic
-    # pattern.
+    # Softmax_grad isn't that interesting as an Op, but it has the signature
+    # we look for when trying to insert CrossEntropySoftmax... grad.  So for now,
+    # we add softmax_grad to graphs. In the future, we may modify the
+    # CrossEntropySoftmax...grad to look for the more basic pattern.
    #

    def setUp(self):
        utt.seed_rng()
        self.rng = numpy.random.RandomState(utt.fetch_seed())
-        self.mode=theano.compile.mode.get_default_mode()
-        self.mode=self.mode.including('canonicalize')
+        self.mode = theano.compile.mode.get_default_mode()
+        self.mode = self.mode.including('canonicalize')

    def test_basic(self):
        c = T.matrix()
-        p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x')
+        p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')

        # test that function contains softmax and no div.
-        f = theano.function([c],p_y, mode=self.mode)
+        f = theano.function([c], p_y, mode=self.mode)
        f_ops = [n.op for n in f.maker.fgraph.toposort()]
        #print '--- f ='
        #printing.debugprint(f)
        #print '==='
        assert len(f_ops) == 1
        assert softmax in f_ops
-        f(self.rng.rand(3,4).astype(config.floatX))
+        f(self.rng.rand(3, 4).astype(config.floatX))

    def test_grad(self):
        c = T.matrix()
-        p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0,'x')
+        p_y = T.exp(c) / T.exp(c).sum(axis=1).dimshuffle(0, 'x')

        # test that function contains softmax and softmaxgrad
        w = T.matrix()
        backup = config.warn.sum_div_dimshuffle_bug
        config.warn.sum_div_dimshuffle_bug = False
        try:
-            g = theano.function([c,w],T.grad((p_y*w).sum(), c))
+            g = theano.function([c, w],T.grad((p_y*w).sum(), c))
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
        g_ops = [n.op for n in g.maker.fgraph.toposort()]
@@ -1127,7 +1145,7 @@ class Test_softmax_opt:
        assert len(g_ops) == 2
        assert softmax in g_ops
        assert softmax_grad in g_ops
-        g(self.rng.rand(3,4), self.rng.uniform(.5, 1, (3,4)))
+        g(self.rng.rand(3, 4), self.rng.uniform(.5, 1, (3,4)))

    def test_transpose_basic(self):
        # this should be a transposed softmax
@@ -1135,14 +1153,14 @@ class Test_softmax_opt:
        p_y = T.exp(c) / T.exp(c).sum(axis=0)

        # test that function contains softmax and no div.
-        f = theano.function([c],p_y)
+        f = theano.function([c], p_y)
        #printing.debugprint(f)

        # test that function contains softmax and no div.
        backup = config.warn.sum_div_dimshuffle_bug
        config.warn.sum_div_dimshuffle_bug = False
        try:
-            g = theano.function([c],T.grad(p_y.sum(), c))
+            g = theano.function([c], T.grad(p_y.sum(), c))
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
        #printing.debugprint(g)
@@ -1169,15 +1187,5 @@ class Test_softmax_opt:

    # REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc.

-#if __name__ == '__main__':
-#    unittest.main()
-
-
 if __name__ == '__main__':
-
-    t = T_CrossentropyCategorical1HotGrad('setUp')
-    t.setUp()
-    t.test_infer_shape()
-
-
-
+    unittest.main()