a lot of pep8

7885e618 · Ian Goodfellow · c7d06ac9 · 7885e618 · 7885e618 · 7885e618
--- a/theano/tensor/nnet/sigm.py
+++ b/theano/tensor/nnet/sigm.py
@@ -42,7 +42,7 @@ class ScalarSigmoid(scalar.UnaryScalarOp):
        assert rval.type.dtype.find('float') != -1
-        return [ rval ]
+        return [rval]
    def c_code(self, node, name, inp, out, sub):
        x, = inp
@@ -69,7 +69,7 @@ sigmoid = elemwise.Elemwise(scalar_sigmoid, name='sigmoid')
 sigmoid_inplace = elemwise.Elemwise(
        ScalarSigmoid(scalar.transfer_type(0)),
-        inplace_pattern={0:0},
+        inplace_pattern={0: 0},
        name='sigmoid_inplace',
        )
@@ -84,12 +84,15 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
        if x > 30.0:
            return x
        return numpy.log1p(numpy.exp(x))
    def impl(self, x):
        return ScalarSoftplus.static_impl(x)
    def grad(self, inp, grads):
        x, = inp
        gz, = grads
        return [gz * scalar_sigmoid(x)]
    def c_code(self, node, name, inp, out, sub):
        x, = inp
        z, = out
@@ -103,27 +106,29 @@ class ScalarSoftplus(scalar.UnaryScalarOp):
            return """%(z)s = %(x)s < -745.0 ? 0.0 : %(x)s > 16.0 ? %(x)s : log1p(exp(%(x)s));""" % locals()
        else:
            raise NotImplementedError('only floatingpoint is implemented')
    def c_code_cache_version(self):
        v = super(ScalarSoftplus, self).c_code_cache_version()
        if v:
            return (2,) + v
        else:
            return v
-scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name='scalar_softplus')
+scalar_softplus = ScalarSoftplus(scalar.upgrade_to_float, name=                                                                                                                                                                                                        'scalar_softplus')
 softplus = elemwise.Elemwise(scalar_softplus, name='softplus')
 pprint.assign(softplus, printing.FunctionPrinter('softplus'))
 def _skip_mul_1(r):
    if r.owner and r.owner.op == tensor.mul:
-        not_is_1 = [i for i in r.owner.inputs if not _is_1(i) ]
+        not_is_1 = [i for i in r.owner.inputs if not _is_1(i)]
-        if len(not_is_1)==1:
+        if len(not_is_1) == 1:
            return not_is_1[0]
 logsigm_to_softplus = gof.PatternSub(
    (tensor.log, (sigmoid, 'x')),
    (tensor.neg, (softplus, (tensor.neg, 'x'))),
-    allow_multiple_clients = True,
+    allow_multiple_clients=True,
    skip_identities_fn=_skip_mul_1)
@@ -139,21 +144,22 @@ def _is_1(expr):
 log1msigm_to_softplus = gof.PatternSub(
    (tensor.log,
        (tensor.sub,
-            dict(pattern='y', constraint = _is_1),
+            dict(pattern='y', constraint=_is_1),
            (sigmoid, 'x'))),
    (tensor.neg, (softplus, 'x')),
-    allow_multiple_clients = True,
+    allow_multiple_clients=True,
    skip_identities_fn=_skip_mul_1)
 log1pexp_to_softplus = gof.PatternSub(
    (tensor.log1p,
     (tensor.exp, 'x')),
    (softplus, 'x'),
-    allow_multiple_clients = True)
+    allow_multiple_clients=True)
+opt.register_stabilize(logsigm_to_softplus, name='logsigm_to_softplus')
+opt.register_stabilize(log1msigm_to_softplus, name='log1msigm_to_softplus')
+opt.register_stabilize(log1pexp_to_softplus, name='log1pexp_to_softplus')
-opt.register_stabilize(logsigm_to_softplus, name = 'logsigm_to_softplus')
-opt.register_stabilize(log1msigm_to_softplus, name = 'log1msigm_to_softplus')
-opt.register_stabilize(log1pexp_to_softplus, name = 'log1pexp_to_softplus')
 def is_1pexp(t):
    """
@@ -247,7 +253,7 @@ def partition_num_or_denom(r, f):
        else:
            neg_t, f_t = f_t
            f_terms.append(f_t)
-            neg ^= neg_t #bit flip if neg_t is true
+            neg ^= neg_t  # bit flip if neg_t is true
    return f_terms, rest, neg
@@ -299,7 +305,8 @@ def local_exp_over_1_plus_exp(node):
        #find all the exp() terms in the numerator
        num, denom = node.inputs
        num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp)
-        denom_1pexp, denom_rest, denom_neg = partition_num_or_denom(denom, is_1pexp)
+        denom_1pexp, denom_rest,
+            denom_neg = partition_num_or_denom(denom, is_1pexp)
        sigmoids = []
        for t in denom_1pexp:
@@ -311,7 +318,7 @@ def local_exp_over_1_plus_exp(node):
                # case: 1/(1+exp(x))
                sigmoids.append(sigmoid(-t))
-        if not sigmoids: # we didn't find any.  abort
+        if not sigmoids:  # we didn't find any.  abort
            return
        # put the new numerator together
        new_num = sigmoids + [tensor.exp(t) for t in num_exp_x] + num_rest
@@ -330,6 +337,7 @@ def local_exp_over_1_plus_exp(node):
        else:
            return [new_num / tensor.mul(*denom_rest)]
 def parse_mul_tree(root):
    """
    Parse a tree of multiplications starting at the given root.
@@ -512,7 +520,7 @@ def perform_sigm_times_exp(tree, exp_x=None, exp_minus_x=None, sigm_x=None,
        sigm_minus_x = []
    if full_tree is None:
        full_tree = tree
-    if False: # Debug code.
+    if False:  # Debug code.
        print '<perform_sigm_times_exp>'
        print '  full_tree   = %s' % full_tree
        print '  tree        = %s' % tree
@@ -621,10 +629,13 @@ def local_inv_1_plus_exp(node):
                if nonconsts[0].owner and nonconsts[0].owner.op == tensor.exp:
                    if scalars and numpy.allclose(numpy.sum(scalars), 1):
                        return opt._fill_chain(
-                                sigmoid(tensor.neg(nonconsts[0].owner.inputs[0])),
+                                sigmoid(
+                                    tensor.neg(nonconsts[0].owner.inputs[0])),
                                scalar_inputs)
 # Registration is below, and conditional.
 @gof.local_optimizer([tensor.sub])
 def local_1msigmoid(node):
    """
@@ -633,7 +644,7 @@ def local_1msigmoid(node):
    if node.op == tensor.sub:
        sub_l, sub_r = node.inputs
        if len(sub_r.clients) > 1:
-            return # graph is using both sigm and 1-sigm
+            return  # graph is using both sigm and 1-sigm
        if sub_r.owner and sub_r.owner.op == sigmoid:
            try:
                val_l = opt.get_constant_value(sub_l)
@@ -686,13 +697,14 @@ if 0:
                        assert t0.owner.op == div
                        t0top, t0bot = t0.owner.inputs
                        t1top, t1bot = t1.owner.inputs
-                        rval.append(div(mul(*(t0top+t1top)), mul(*(t0bot+t1bot))))
+                        rval.append(div(mul(*(
+                            t0top + t1top)), mul(*(t0bot + t1bot))))
                        if len(rval) > 100:
                            # This loop can be exponentially long.
                            # aborting
                            return []
-        elif len(node.outputs)>1:
+        elif len(node.outputs) > 1:
            return []
        else:
            return [node.outputs[0]]
--- a/theano/tensor/randomstreams.py
+++ b/theano/tensor/randomstreams.py
@@ -136,7 +136,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
    """
-    def __init__(self, seed=None, no_warn = False):
+    def __init__(self, seed=None, no_warn=False):
        """:type seed: None or int
        :param seed: a default seed to initialize the RandomState
@@ -146,7 +146,7 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
        """
        if not no_warn:
            deprecation_warning()
-        super(RandomStreams, self).__init__(no_warn = True)
+        super(RandomStreams, self).__init__(no_warn=True)
        self.random_state_variables = []
        self.default_instance_seed = seed

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -47,7 +47,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
            #test that DimShuffle.infer_shape work correctly
            x = TensorType('float64', ib)('x')
            e = DimShuffle(ib, shuffle)(x)
-            f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function()
+            f = copy(linker).accept(FunctionGraph([x], [e.
+                shape])).make_function()
            assert all(f(numpy.ones(xsh))) == all(zsh)
        # Test when we drop a axis that is not broadcastable
@@ -125,7 +126,8 @@ class test_Broadcast(unittest.TestCase):
                x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
                y = TensorType('float64', [(entry == 1) for entry in ysh])('y')
                e = Elemwise(scalar.add)(x, y)
-                f = copy(linker).accept(FunctionGraph([x, y], [e.shape])).make_function()
+                f = copy(linker).accept(FunctionGraph([x,
+                     y], [e.shape])).make_function()
                assert tuple(f(xv, yv)) == tuple(zv.shape)
    def with_linker_inplace(self, linker):
@@ -154,7 +156,8 @@ class test_Broadcast(unittest.TestCase):
                x = TensorType('float64', [(entry == 1) for entry in xsh])('x')
                y = TensorType('float64', [(entry == 1) for entry in ysh])('y')
                e = Elemwise(scalar.Add(scalar.transfer_type(0)), {0: 0})(x, y)
-                f = copy(linker).accept(FunctionGraph([x, y], [e.shape])).make_function()
+                f = copy(linker).accept(FunctionGraph([x,
+                     y], [e.shape])).make_function()
                xv = numpy.asarray(numpy.random.rand(*xsh))
                yv = numpy.asarray(numpy.random.rand(*ysh))
                zv = xv + yv
@@ -349,7 +352,8 @@ class test_CAReduce(unittest_tools.InferShapeTester):
                    e = tensor_op(x, axis=tosum)
                if tosum is None:
                    tosum = range(len(xsh))
-                f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function()
+                f = copy(linker).accept(FunctionGraph([x],
+                     [e.shape])).make_function()
                if not(scalar_op in [scalar.maximum, scalar.minimum] and
                       ((xsh == () or numpy.prod(xsh) == 0))):
                    assert all(f(xv) == zv.shape)
@@ -459,7 +463,8 @@ class test_Prod(unittest.TestCase):
        # including zeros, as the case with zeros is important
        # (and special cases: 1 zero in the row, more than 1 zero in the row)
-        x_val = numpy.asarray([[1,2,3],[4,5,6],[7,8,9]], dtype='float32')
+        x_val = numpy.asarray([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+             dtype='float32')
        x = theano.tensor.dmatrix()
        # now with verify_grad
        unittest_tools.verify_grad(Prod(axis=1), [x_val], mode=self.mode)
@@ -471,26 +476,28 @@ class test_Prod(unittest.TestCase):
        unittest_tools.verify_grad(fn, [x_val], mode=self.mode)
    def test_verify_grad_with_zeros(self):
        # including zeros, as the case with zeros is important
        # (and special cases: 1 zero in the row, more than 1 zero in the row)
-        x_val = numpy.asarray([[1.,2.,3.],[0.,5.,6.],[0.,0.,9.]], dtype='float32')
+        x_val = numpy.asarray([[1., 2., 3.], [0., 5., 6.], [0., 0., 9.]],
+             dtype='float32')
        x = theano.tensor.dmatrix()
        # sanity check
        x2 = theano.tensor.dmatrix()
        p = Prod(axis=1)(x)
        p2 = Prod(axis=1)(x2)
-        fn = theano.function([x,x2],[p-p2], mode=self.mode)
+        fn = theano.function([x, x2], [p - p2], mode=self.mode)
        #print "hand computed diff for each row"
-        x2_val = numpy.asarray([[1., 2., 3.003], [0.003,5.,6], [0.,0.,9.01]])
+        x2_val = numpy.asarray([[1., 2., 3.003], [0.003, 5., 6], [
+            0., 0., 9.01]])
        #print fn(x_val, x2_val)
-        fn2 = theano.function([x],[theano.tensor.grad(p.sum(),x)], mode=self.mode)
+        fn2 = theano.function([x], [theano.tensor.grad(p.sum(), x)],
+             mode=self.mode)
        #print "real grad"
        #print fn2(x_val)
-        fn3 = theano.function([x],[p], mode=self.mode)
+        fn3 = theano.function([x], [p], mode=self.mode)
-        assert numpy.allclose(fn3(x_val), [6.,0.,0.])
+        assert numpy.allclose(fn3(x_val), [6., 0., 0.])
        # now with verify_grad
        unittest_tools.verify_grad(Prod(axis=1), [x_val], mode=self.mode)
@@ -511,10 +518,10 @@ class test_Prod(unittest.TestCase):
    def test_prod_without_zeros(self):
        x = theano.tensor.dmatrix()
-        x_val = numpy.array([[1,2,3],[0,5,6],[0,0,9]], dtype='float32')
+        x_val = numpy.array([[1, 2, 3], [0, 5, 6], [0, 0, 9]], dtype='float32')
        pwz = ProdWithoutZeros(axis=1)(x)
        fn = theano.function([x], pwz, mode=self.mode)
-        assert numpy.allclose(fn(x_val), [6,30,9])
+        assert numpy.allclose(fn(x_val), [6, 30, 9])
        pwz_a0 = ProdWithoutZeros(axis=0)(x)
        fn_a0 = theano.function([x], pwz_a0, mode=self.mode)
@@ -522,25 +529,30 @@ class test_Prod(unittest.TestCase):
    def test_other_grad_tests(self):
        x = theano.tensor.dmatrix()
-        x_val1 = numpy.array([[1,2,3],[0,5,6],[0,0,9]], dtype='float32')
+        x_val1 = numpy.array([[1, 2, 3], [0, 5, 6], [0, 0, 9]],
-        x_val2 = numpy.array([[1,2,0],[0,5,6],[7,8,9],[9,10,0]], dtype='float32')
+             dtype='float32')
+        x_val2 = numpy.array([[1, 2, 0], [0, 5, 6], [7, 8, 9], [9, 10, 0]],
+             dtype='float32')
        rng = rng = numpy.random.RandomState(43)
        p = Prod(axis=1)
        grad_p = theano.tensor.grad(p(x).sum(), x)
        grad_fn = theano.function([x], grad_p, mode=self.mode)
-        assert numpy.allclose(grad_fn(x_val1), [[6.,3.,2.],[30.,0.,0.],[0.,0.,0.]])
+        assert numpy.allclose(grad_fn(x_val1), [[6., 3., 2.], [30., 0.,
-        assert numpy.allclose(grad_fn(x_val2), [[0., 0., 2.], [30., 0., 0.], [72., 63., 56.], [0., 0., 90.]])
+            0.], [0., 0., 0.]])
+        assert numpy.allclose(grad_fn(x_val2), [[0., 0., 2.], [30.,
+             0., 0.], [72., 63., 56.], [0., 0., 90.]])
        p_axis0 = Prod(axis=0)
        grad_p_axis0 = theano.tensor.grad(p_axis0(x).sum(), x)
        grad_fn_axis0 = theano.function([x], grad_p_axis0, mode=self.mode)
-        assert numpy.allclose(grad_fn_axis0(x_val2), [[0., 400., 0.],[63., 160., 0.], [0., 100., 0.], [0., 80., 0.]])
+        assert numpy.allclose(grad_fn_axis0(x_val2), [[0., 400.,
+             0.], [63., 160., 0.], [0., 100., 0.], [0., 80., 0.]])
        tensor.verify_grad(p, [x_val1], rng=rng, mode=self.mode)
    def test_mul_without_zeros_zeros(self):
-        a = numpy.zeros((3,3))
+        a = numpy.zeros((3, 3))
        x = theano.tensor.dmatrix()
@@ -655,6 +667,7 @@ class T_sum_dtype(unittest.TestCase):
                idx += 1
 class T_mean_dtype(unittest.TestCase):
    def test_mean_default_dtype(self):
        """
@@ -710,6 +723,7 @@ class T_mean_dtype(unittest.TestCase):
                idx += 1
 class T_prod_dtype(unittest.TestCase):
    def test_prod_default_dtype(self):
        """
@@ -761,6 +775,7 @@ class T_prod_dtype(unittest.TestCase):
                idx += 1
 class T_prod_without_zeros_dtype(unittest.TestCase):
    def test_prod_without_zeros_default_dtype(self):
        """
@@ -844,11 +859,8 @@ if __name__ == '__main__':
 """
 if __name__ == '__main__':
    t = TestElemwise('setUp')
    t.setUp()
    t.test_infer_shape()
--- a/theano/tensor/tests/test_naacl09.py
+++ b/theano/tensor/tests/test_naacl09.py
@@ -12,15 +12,19 @@ import sys
 from theano.tests import unittest_tools
 from numpy.testing.noseclasses import KnownFailureTest
 def cross_entropy(target, output, axis=1):
    """
    @todo: This is essentially duplicated as tensor.nnet.binary_crossentropy
    @warning: OUTPUT and TARGET are reversed in tensor.nnet.binary_crossentropy
    """
    return -T.mean(target * T.log(output) + (1 - target) * T.log(1 - output), axis=axis)
 def quadratic(target, output, axis=1):
    return T.mean(T.sqr(target - output), axis=axis)
 class QuadraticDenoisingAA(module.Module):
    """Quadratic de-noising Auto-encoder
@@ -35,15 +39,15 @@ class QuadraticDenoisingAA(module.Module):
    """
    def __init__(self,
-            input = None,
+            input=None,
 #            regularize = False,
-            tie_weights = False,
+            tie_weights=False,
-            n_quadratic_filters = 1,
+            n_quadratic_filters=1,
-            _w1 = None,
+            _w1=None,
-            _w2 = None,
+            _w2=None,
-            _b1 = None,
+            _b1=None,
-            _b2 = None,
+            _b2=None,
-            _qfilters = None,
+            _qfilters=None,
            activation_function=NN.sigmoid,
            reconstruction_cost_function=cross_entropy):
        """
@@ -83,7 +87,8 @@ class QuadraticDenoisingAA(module.Module):
        # PARAMETERS
        if _qfilters is None:
            #self.qfilters = [theano.Member(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
-            self.qfilters = [(T.dmatrix('q%i'%i)) for i in xrange(n_quadratic_filters)]
+            self.qfilters = [(T.dmatrix('q%i' % i))
+                 for i in xrange(n_quadratic_filters)]
        else:
            #self.qfilters = [theano.Member(q) for q in _qfilters]
            self.qfilters = [(q) for q in _qfilters]
@@ -91,7 +96,8 @@ class QuadraticDenoisingAA(module.Module):
        #self.w1 = theano.Member(T.matrix('w1')) if _w1 is None else theano.Member(_w1)
        if _w1 is None:
            self.w1 = (T.matrix('w1'))
-        else: self.w1 = (_w1)
+        else:
+            self.w1 = (_w1)
        if _w2 is None:
            if not tie_weights:
                #self.w2 = theano.Member(T.matrix())
@@ -104,30 +110,30 @@ class QuadraticDenoisingAA(module.Module):
        #self.b1 = theano.Member(T.vector('b1')) if _b1 is None else theano.Member(_b1)
        if _b1 is None:
            self.b1 = (T.vector('b1'))
-        else: self.b1 = (_b1)
+        else:
+            self.b1 = (_b1)
        #self.b2 = theano.Member(T.vector('b2')) if _b2 is None else theano.Member(_b2)
        if _b2 is None:
            self.b2 = (T.vector('b2'))
-        else: self.b2 = (_b2)
+        else:
+            self.b2 = (_b2)
 #        # REGULARIZATION COST
 #        self.regularization = self.build_regularization()
        ### NOISELESS ###
        # HIDDEN LAYER
        def _act(x):
            if len(self.qfilters) > 0:
                qsum = 10e-10   # helps to control the gradient in the square-root below
                for qf in self.qfilters:
-                    qsum = qsum + T.dot(x, qf)**2
+                    qsum = qsum + T.dot(x, qf) ** 2
                return T.dot(x, self.w1) + self.b1 + T.sqrt(qsum)
            else:
                return T.dot(x, self.w1) + self.b1
-        self.hidden_activation = _act(self.input) #noise-free hidden
+        self.hidden_activation = _act(self.input)  # noise-free hidden
        self.hidden = self.hid_activation_function(self.hidden_activation)
@@ -144,7 +150,6 @@ class QuadraticDenoisingAA(module.Module):
 #        if self.regularize:
 #            self.cost = self.cost + self.regularization
        ### WITH NOISE ###
        self.corrupted_input = self.build_corrupted_input()
@@ -165,7 +170,6 @@ class QuadraticDenoisingAA(module.Module):
 #        if self.regularize:
 #            self.ncost = self.ncost + self.regularization
        # GRADIENTS AND UPDATES
        if self.tie_weights:
            self.params = [self.w1, self.b1, self.b2] + self.qfilters
@@ -173,7 +177,8 @@ class QuadraticDenoisingAA(module.Module):
            self.params = [self.w1, self.w2, self.b1, self.b2] + self.qfilters
        gradients = T.grad(self.ncost, self.params)
-        updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gradients))
+        updates = dict((p, p - self.lr * g) for p, g in zip(self.
+            params, gradients))
        # INTERFACE METHODS
        #self.update = theano.Method(self.input, self.ncost, updates)
@@ -192,16 +197,17 @@ class QuadraticDenoisingAA(module.Module):
        filter's initial range)
        """
        if (input_size is None) ^ (hidden_size is None):
-            raise ValueError("Must specify input_size and hidden_size or neither.")
+            raise ValueError(
+                "Must specify input_size and hidden_size or neither.")
        super(QuadraticDenoisingAA, self)._instance_initialize(obj, {})
        obj.random.initialize()
        R = N.random.RandomState(unittest_tools.fetch_seed(seed))
        if input_size is not None:
            sz = (input_size, hidden_size)
-            inf = 1/N.sqrt(input_size)
+            inf = 1 / N.sqrt(input_size)
-            hif = 1/N.sqrt(hidden_size)
+            hif = 1 / N.sqrt(hidden_size)
-            obj.w1 = N.asarray(R.uniform(size = sz, low = -inf, high = inf),
+            obj.w1 = N.asarray(R.uniform(size=sz, low=-inf, high=inf),
                    dtype=config.floatX)
            if not self.tie_weights:
                obj.w2 = N.asarray(
@@ -257,14 +263,17 @@ class SigmoidXEQuadraticDenoisingAA(QuadraticDenoisingAA):
    def _instance_initialize(self, obj, input_size, hidden_size, noise_level, seed, lr, qfilter_relscale):
 #        obj.l2_coef = 0.0
        obj.noise_level = N.asarray(noise_level, dtype=config.floatX)
-        super(SigmoidXEQuadraticDenoisingAA, self)._instance_initialize(obj, input_size, hidden_size, seed, lr, qfilter_relscale)
+        super(SigmoidXEQuadraticDenoisingAA, self)
+            ._instance_initialize(obj, input_size, hidden_size, seed, lr, qfilter_relscale)
 QDAA = SigmoidXEQuadraticDenoisingAA
 class Loss01(object):
    def loss_01(self, x, targ):
        return N.mean(self.classify(x) != targ)
 class Module_Nclass(module.FancyModule):
    def _instance_initialize(mod_self, self, n_in, n_out, lr, seed):
        #self.component is the LogisticRegressionTemplate instance that built this guy.
@@ -280,29 +289,34 @@ class Module_Nclass(module.FancyModule):
        self.output_dimension = n_out
    def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False):
-        super(Module_Nclass, self).__init__() #boilerplate
+        super(Module_Nclass, self).__init__()  # boilerplate
        #self.x = module.Member(x) if x is not None else T.matrix('input')
        if x is not None:
            self.x = (x)
-        else: self.x = T.matrix('input')
+        else:
+            self.x = T.matrix('input')
        #self.targ = module.Member(targ) if targ is not None else T.lvector()
        if targ is not None:
            self.targ = (targ)
-        else: self.targ = T.lvector()
+        else:
+            self.targ = T.lvector()
        #self.w = module.Member(w) if w is not None else module.Member(T.dmatrix())
        if w is not None:
            self.w = (w)
-        else: self.w = (T.dmatrix())
+        else:
+            self.w = (T.dmatrix())
        #self.b = module.Member(b) if b is not None else module.Member(T.dvector())
        if b is not None:
            self.b = (b)
-        else: self.b = (T.dvector())
+        else:
+            self.b = (T.dvector())
        #self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar())
        if lr is not None:
            self.lr = (lr)
-        else: self.lr = (T.dscalar())
+        else:
+            self.lr = (T.dscalar())
        self.params = [p for p in [self.w, self.b] if p.owner is None]
@@ -341,13 +355,14 @@ class Module_Nclass(module.FancyModule):
            #self.update = module.Method([self.input, self.targ], sum_xent,
                    #updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
 class ConvolutionalMLP(module.FancyModule):
    def __init__(self,
            window_size,
            n_quadratic_filters,
            activation_function,
            reconstruction_cost_function,
-            tie_weights = False,
+            tie_weights=False,
 #            _input,
 #            _targ
            ):
@@ -362,9 +377,9 @@ class ConvolutionalMLP(module.FancyModule):
        self.input_representations = []
        self.input_representations.append(QDAA(
                            input=self.inputs[0],
-                            tie_weights = tie_weights,
+                            tie_weights=tie_weights,
-                            n_quadratic_filters = n_quadratic_filters,
+                            n_quadratic_filters=n_quadratic_filters,
-                            activation_function = activation_function,
+                            activation_function=activation_function,
                            reconstruction_cost_function = reconstruction_cost_function
                        )
        )
@@ -373,9 +388,9 @@ class ConvolutionalMLP(module.FancyModule):
            self.input_representations.append(
                            QDAA(
                                input=i,
-                                tie_weights = tie_weights,
+                                tie_weights=tie_weights,
-                                n_quadratic_filters = n_quadratic_filters,
+                                n_quadratic_filters=n_quadratic_filters,
-                                activation_function = activation_function,
+                                activation_function=activation_function,
                                reconstruction_cost_function = reconstruction_cost_function,
                                _w1 = self.input_representations[0].w1,
                                _w2 = self.input_representations[0].w2,
@@ -384,14 +399,16 @@ class ConvolutionalMLP(module.FancyModule):
                                _qfilters = self.input_representations[0].qfilters
                            )
            )
-            assert self.input_representations[-1].w1 is self.input_representations[0].w1
+            assert self.input_representations[-1]
+                .w1 is self.input_representations[0].w1
-        self.input_representation = T.concatenate([i.hidden for i in self.input_representations], axis=1)
+        self.input_representation = T.concatenate([i.
+            hidden for i in self.input_representations], axis=1)
        self.hidden = QDAA(
-                        input = self.input_representation,
+                        input=self.input_representation,
-                        tie_weights = tie_weights,
+                        tie_weights=tie_weights,
-                        n_quadratic_filters = n_quadratic_filters,
+                        n_quadratic_filters=n_quadratic_filters,
-                        activation_function = activation_function,
+                        activation_function=activation_function,
                        reconstruction_cost_function = reconstruction_cost_function
                    )
        self.output = Module_Nclass(x=self.hidden.hidden, targ=self.targ)
@@ -408,11 +425,13 @@ class ConvolutionalMLP(module.FancyModule):
                        self.hidden.b1,
                        self.hidden.b2
                        ] + self.hidden.qfilters
-        input_pretraining_cost = sum(i.ncost for i in self.input_representations)
+        input_pretraining_cost = sum(i.ncost for i in self.
+            input_representations)
        hidden_pretraining_cost = self.hidden.ncost
        input_pretraining_gradients = T.grad(input_pretraining_cost,
                input_pretraining_params)
-        hidden_pretraining_gradients = T.grad(hidden_pretraining_cost, hidden_pretraining_params)
+        hidden_pretraining_gradients = T.grad(
+            hidden_pretraining_cost, hidden_pretraining_params)
        pretraining_updates = \
                dict((p, p - self.lr * g) for p, g in \
                zip(input_pretraining_params, input_pretraining_gradients) \
@@ -428,8 +447,10 @@ class ConvolutionalMLP(module.FancyModule):
                        [self.output.w, self.output.b]
        finetuning_cost = self.output.cost
        finetuning_gradients = T.grad(finetuning_cost, finetuning_params)
-        finetuning_updates = dict((p, p - self.lr * g) for p, g in zip(finetuning_params, finetuning_gradients))
+        finetuning_updates = dict((p, p - self.lr * g) for p,
-        self.finetuning_update = module.Method(self.inputs + [self.targ], self.output.cost, finetuning_updates)
+             g in zip(finetuning_params, finetuning_gradients))
+        self.finetuning_update = module.Method(self.inputs + [self.
+            targ], self.output.cost, finetuning_updates)
        #self.validate = module.Method(self.inputs + [self.targ], [self.output.cost, self.output.argmax, self.output.max_pr])
        #self.softmax_output = module.Method(self.inputs, self.output.softmax_unsupervised)
@@ -447,8 +468,10 @@ class ConvolutionalMLP(module.FancyModule):
 #        for layer in obj.layers:
 #            if layer.lr is None:
 #                layer.lr = lr
-        assert self.input_representations[-1] is not self.input_representations[0]
+        assert self.input_representations[-1]
-        assert self.input_representations[-1].w1 is self.input_representations[0].w1
+            is not self.input_representations[0]
+        assert self.input_representations[-1]
+            .w1 is self.input_representations[0].w1
        for i in self.input_representations:
 #            i.initialize(input_size=self.input_size, hidden_size=self.input_representation_size, seed=R.random_integers(2**30), noise_level=noise_level, qfilter_relscale=qfilter_relscale)
@@ -465,13 +488,16 @@ class ConvolutionalMLP(module.FancyModule):
            assert (i.w2 == self.input_representations[0].w2).all()
            assert (i.b1 == self.input_representations[0].b1).all()
            assert (i.b2 == self.input_representations[0].b2).all()
-            assert N.all((a==b).all() for a, b in zip(i.qfilters, self.input_representations[0].qfilters))
+            assert N.all((a == b).all() for a, b in zip(i.
+                qfilters, self.input_representations[0].qfilters))
        self.hidden.initialize(input_size=(len(self.inputs) * self.input_representation_size),
                hidden_size=self.hidden_representation_size, noise_level=noise_level,
                seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale)
-        self.output.initialize(n_in=self.hidden_representation_size, n_out=self.output_size, lr=lr, seed=R.random_integers(2**30))
+        self.output.initialize(n_in=self.
+            hidden_representation_size, n_out=self.output_size, lr=lr, seed=R.random_integers(2**30))
 def create(window_size=3,
        input_dimension=9,
@@ -488,22 +514,24 @@ def create(window_size=3,
    activation_function = T.tanh
    architecture = ConvolutionalMLP( \
-                window_size = window_size,
+                window_size=window_size,
-                n_quadratic_filters = n_quadratic_filters,
+                n_quadratic_filters=n_quadratic_filters,
-                activation_function = activation_function,
+                activation_function=activation_function,
-                reconstruction_cost_function = quadratic,
+                reconstruction_cost_function=quadratic,
-                tie_weights = False
+                tie_weights=False
            )
    backup = config.warn.sum_div_dimshuffle_bug
    config.warn.sum_div_dimshuffle_bug = False
    try:
-        model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
+        model = architecture.make(input_size=input_dimension,
+             input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
    finally:
        config.warn.sum_div_dimshuffle_bug = backup
    return model
-def create_realistic(window_size=3,#7,
+def create_realistic(window_size=3,  # 7,
        input_dimension=200,
        output_vocabsize=23,
        n_quadratic_filters=2,
@@ -518,15 +546,17 @@ def create_realistic(window_size=3,#7,
    activation_function = T.tanh
    architecture = ConvolutionalMLP( \
-                window_size = window_size,
+                window_size=window_size,
-                n_quadratic_filters = n_quadratic_filters,
+                n_quadratic_filters=n_quadratic_filters,
-                activation_function = activation_function,
+                activation_function=activation_function,
-                reconstruction_cost_function = quadratic,
+                reconstruction_cost_function=quadratic,
-                tie_weights = False
+                tie_weights=False
            )
-    model = architecture.make(input_size=input_dimension, input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
+    model = architecture.make(input_size=input_dimension,
+         input_representation_size=token_representation_size, hidden_representation_size=concatenated_representation_size, output_size=output_vocabsize, lr=lr, seed=seed, noise_level=noise_level, qfilter_relscale=qfilter_relscale, mode=compile_mode)
    return model
 def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
        optimizer=None, realistic=False):
    #print "BUILDING MODEL"
@@ -535,11 +565,12 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
    if optimizer:
        mode = theano.Mode(linker='c|py', optimizer=optimizer)
-    else: mode = get_default_mode()
+    else:
+        mode = get_default_mode()
    if mode.__class__.__name__ == 'DebugMode':
-        iters_per_unsup=1
+        iters_per_unsup = 1
-        iters_per_sup =1
+        iters_per_sup = 1
    if realistic:
        m = create_realistic(compile_mode=mode)
@@ -552,7 +583,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
    for i, node in enumerate(m.pretraining_update.maker.fgraph.toposort()):
        idx_of_node[node] = i
        if False and i > -1:
-            print '   ', i, node, [(ii, idx_of_node.get(ii.owner, 'IN')) for ii in node.inputs]
+            print '   ', i, node, [(ii, idx_of_node.get(ii.
+                owner, 'IN')) for ii in node.inputs]
        prog_str.append(str(node))
    #print input_pretraining_gradients[4].owner.inputs
    #print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
@@ -562,8 +594,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
    rng = N.random.RandomState(unittest_tools.fetch_seed(23904))
-    inputs = [rng.rand(10,m.input_size) for i in 1,2,3]
+    inputs = [rng.rand(10, m.input_size) for i in 1, 2, 3]
-    targets = N.asarray([0,3,4,2,3,4,4,2,1,0])
+    targets = N.asarray([0, 3, 4, 2, 3, 4, 4, 2, 1, 0])
    #print inputs
    #print 'UNSUPERVISED PHASE'
@@ -584,8 +616,8 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
        s0, s1 = [str(j) for j in m.pretraining_update(*inputs)]
        #print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
    if iters_per_unsup == 3:
-        assert s0.startswith('0.927793')#'0.403044')
+        assert s0.startswith('0.927793')  # '0.403044')
-        assert s1.startswith('0.068035')#'0.074898')
+        assert s1.startswith('0.068035')  # '0.074898')
    #print 'UNSUPERVISED took %.3fs'%(time.time() - t)
    #print 'FINETUNING GRAPH'
@@ -601,6 +633,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
        assert 19.7042 < s0f and s0f < 19.7043
    #print 'SUPERVISED took %.3fs'%( time.time() - t)
 def jtest_main():
    from theano import gof
    JTEST = theano.compile.mode.optdb.query(*sys.argv[2:])
@@ -609,13 +642,17 @@ def jtest_main():
    optimizer = eval(sys.argv[1])
    test_naacl_model(optimizer, 10, 10, realistic=False)
 def real_main():
    test_naacl_model()
 def profile_main():
    # This is the main function for profiling
    # We've renamed our original main() above to real_main()
-    import cProfile, pstats, StringIO
+    import cProfile
+    import pstats
+    import StringIO
    prof = cProfile.Profile()
    prof = prof.runctx("real_main()", globals(), locals())
    stream = StringIO.StringIO()

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
--- a/theano/tests/test_gradient.py
+++ b/theano/tests/test_gradient.py
@@ -26,6 +26,7 @@ class testgrad_sources_inputs(unittest.TestCase):
                inputs = [theano.tensor.vector()]
                outputs = [theano.tensor.vector()]
                return gof.Apply(self, inputs, outputs)
            def grad(self, inp, grads):
                x, = inp
                gz, = grads
@@ -43,14 +44,15 @@ class testgrad_sources_inputs(unittest.TestCase):
            def make_node(self, *inputs):
                outputs = [theano.tensor.vector()]
                return gof.Apply(self, inputs, outputs)
            def grad(self, inputs, grads):
-                return [ inputs[0].zeros_like() ]
+                return [inputs[0].zeros_like()]
        i = theano.tensor.vector()
        j = theano.tensor.vector()
        a1 = retOne().make_node(i)
        g = grad_sources_inputs([(a1.out, one)], None)
-        a2 = retOne().make_node(i,j)
+        a2 = retOne().make_node(i, j)
        try:
            g = grad_sources_inputs([(a2.out, one)], None)
        except ValueError, e:
@@ -60,11 +62,13 @@ class testgrad_sources_inputs(unittest.TestCase):
    def test_1in_1out(self):
        """Test grad is called correctly for a 1-to-1 op"""
        gval = theano.tensor.matrix()
        class O(gof.op.Op):
            def make_node(self):
                inputs = [theano.tensor.matrix()]
                outputs = [theano.tensor.matrix()]
                return gof.Apply(self, inputs, outputs)
            def grad(self, inp, grads):
                return gval,
        a1 = O().make_node()
@@ -74,11 +78,13 @@ class testgrad_sources_inputs(unittest.TestCase):
    def test_1in_Nout(self):
        """Test grad is called correctly for a 1-to-many op"""
        gval = theano.tensor.matrix()
        class O(gof.op.Op):
            def make_node(self):
                inputs = [theano.tensor.matrix()]
-                outputs = [theano.tensor.scalar(),theano.tensor.scalar()]
+                outputs = [theano.tensor.scalar(), theano.tensor.scalar()]
                return gof.Apply(self, inputs, outputs)
            def grad(self, inp, grads):
                x, = inp
                gz1, gz2 = grads
@@ -91,11 +97,13 @@ class testgrad_sources_inputs(unittest.TestCase):
        """Test grad is called correctly for a many-to-1 op"""
        gval0 = theano.tensor.scalar()
        gval1 = theano.tensor.scalar()
        class O(gof.op.Op):
            def make_node(self):
                inputs = [theano.tensor.scalar(), theano.tensor.scalar()]
                outputs = [theano.tensor.matrix()]
                return gof.Apply(self, inputs, outputs)
            def grad(self, inp, grads):
                x0, x1 = inp
                gz, = grads
@@ -109,11 +117,13 @@ class testgrad_sources_inputs(unittest.TestCase):
        """Test grad is called correctly for a many-to-many op"""
        gval0 = theano.tensor.matrix()
        gval1 = theano.tensor.matrix()
        class O(gof.op.Op):
            def make_node(self):
-                inputs = [theano.tensor.matrix(),theano.tensor.matrix()]
+                inputs = [theano.tensor.matrix(), theano.tensor.matrix()]
-                outputs = [theano.tensor.matrix(),theano.tensor.matrix()]
+                outputs = [theano.tensor.matrix(), theano.tensor.matrix()]
                return gof.Apply(self, inputs, outputs)
            def grad(self, inp, grads):
                return gval0, gval1
        a1 = O().make_node()
@@ -126,9 +136,11 @@ class testgrad_sources_inputs(unittest.TestCase):
        class O(gof.op.Op):
            def __init__(self, tst):
                self.tst = tst
            def make_node(self, *inputs):
-                outputs = [theano.tensor.matrix(),theano.tensor.matrix()]
+                outputs = [theano.tensor.matrix(), theano.tensor.matrix()]
                return gof.Apply(self, inputs, outputs)
            def grad(self, inputs, g_out):
                return [one]
        i = theano.tensor.matrix()
@@ -136,26 +148,29 @@ class testgrad_sources_inputs(unittest.TestCase):
        g = grad_sources_inputs([(a1.outputs[0], one)], None)
        self.assertTrue(g[i] is one)
 def test_unimplemented_grad_func():
    # tests that function compilation catches unimplemented grads in the graph
    a = theano.tensor.vector()
    b = theano.gradient.grad_not_implemented(theano.tensor.add, 0, a)
    try:
-        f = theano.function([a], b, on_unused_input = 'ignore')
+        f = theano.function([a], b, on_unused_input='ignore')
        assert 0
    except TypeError:
        pass
 def test_undefined_grad_func():
    #tests that function compilation catches undefined grads in the graph
    a = theano.tensor.vector()
    b = theano.gradient.grad_undefined(theano.tensor.add, 0, a)
    try:
-        f = theano.function([a],b, on_unused_input = 'ignore')
+        f = theano.function([a], b, on_unused_input='ignore')
        assert 0
    except TypeError:
        pass
 def test_unimplemented_grad_grad():
    #tests that unimplemented grads are caught in the grad method
@@ -164,132 +179,137 @@ def test_unimplemented_grad_grad():
            return gof.Apply(self, [x], [x.type()])
        def grad(self, inputs, output_grads):
-            return [ theano.gradient.grad_not_implemented(self, 0, inputs[0]) ]
+            return [theano.gradient.grad_not_implemented(self, 0, inputs[0])]
    a = theano.tensor.scalar()
    b = DummyOp()(a)
    try:
-        g = theano.gradient.grad(b,a)
+        g = theano.gradient.grad(b, a)
        assert False
    except TypeError:
        pass
 def test_undefined_grad_grad():
    #tests that undefined grads are caught in the grad method
    V = theano.tensor.TensorType(dtype=config.floatX,
-            broadcastable = (False,False,False,False,False))()
+            broadcastable=(False, False, False, False, False))()
    W = theano.tensor.TensorType(dtype=config.floatX,
-            broadcastable = (False, False, False, False, False))()
+            broadcastable=(False, False, False, False, False))()
    b = theano.tensor.vector()
    d = theano.tensor.ivector()
-    Z = conv3D(V,W,b,d)
+    Z = conv3D(V, W, b, d)
    try:
-        g = theano.gradient.grad(Z.sum(),d)
+        g = theano.gradient.grad(Z.sum(), d)
        assert False
    except TypeError:
        pass
 def test_grad_name():
    A = theano.tensor.matrix('A')
    x = theano.tensor.vector('x')
-    f = theano.tensor.dot(x,theano.tensor.dot(A,x))
+    f = theano.tensor.dot(x, theano.tensor.dot(A, x))
    f.name = 'f'
-    g = theano.tensor.grad(f,x)
+    g = theano.tensor.grad(f, x)
    assert g.name == '(df/dx)'
 def test_grad_duplicate_input():
    #test that the grad works when a variable
    #appears in more than one place in a node's input list
    def output(x):
-        return (x*x)
+        return (x * x)
-    rng = np.random.RandomState([2012,8,28])
+    rng = np.random.RandomState([2012, 8, 28])
    vx = rng.randn(2)
-    theano.tests.unittest_tools.verify_grad(output,[vx])
+    theano.tests.unittest_tools.verify_grad(output, [vx])
 def test_grad_quadratic():
    #test the gradient on a tiny graph
-    def cost(x,A):
+    def cost(x, A):
-        return theano.tensor.dot(x,theano.tensor.dot(A,x))
+        return theano.tensor.dot(x, theano.tensor.dot(A, x))
-    rng = np.random.RandomState([2012,8,28])
+    rng = np.random.RandomState([2012, 8, 28])
    vx = rng.randn(2)
-    vA = rng.randn(2,2)
+    vA = rng.randn(2, 2)
-    theano.tests.unittest_tools.verify_grad(cost,[vx,vA])
+    theano.tests.unittest_tools.verify_grad(cost, [vx, vA])
 def test_grad_quadratic_vector():
    #test the gradient on a small graph
-    def output(x,A):
+    def output(x, A):
-        return theano.tensor.dot(x*x,A)
+        return theano.tensor.dot(x * x, A)
-    rng = np.random.RandomState([2012,8,28])
+    rng = np.random.RandomState([2012, 8, 28])
    vx = rng.randn(2)
-    vA = rng.randn(2,2)
+    vA = rng.randn(2, 2)
-    theano.tests.unittest_tools.verify_grad(output,[vx,vA])
+    theano.tests.unittest_tools.verify_grad(output, [vx, vA])
 def test_grad_cubic():
    #test the gradient on a bigger graph
-    def cost(x,A):
+    def cost(x, A):
-        return theano.tensor.dot(x*x,theano.tensor.dot(A,x))
+        return theano.tensor.dot(x * x, theano.tensor.dot(A, x))
-    rng = np.random.RandomState([2012,8,28])
+    rng = np.random.RandomState([2012, 8, 28])
    vx = rng.randn(2)
-    vA = rng.randn(2,2)
+    vA = rng.randn(2, 2)
+    theano.tests.unittest_tools.verify_grad(cost, [vx, vA])
-    theano.tests.unittest_tools.verify_grad(cost,[vx,vA])
 def test_grad_grad_quadratic():
    #test the gradient on a graph constructed using the gradient
-    def output(x,A):
+    def output(x, A):
-        orig_cost = theano.tensor.dot(x,theano.tensor.dot(A,x))
+        orig_cost = theano.tensor.dot(x, theano.tensor.dot(A, x))
        return theano.gradient.grad(orig_cost, x)
-    rng = np.random.RandomState([2012,8,28])
+    rng = np.random.RandomState([2012, 8, 28])
    vx = rng.randn(2)
-    vA = rng.randn(2,2)
+    vA = rng.randn(2, 2)
+    theano.tests.unittest_tools.verify_grad(output, [vx, vA])
-    theano.tests.unittest_tools.verify_grad(output,[vx,vA])
 def test_grad_grad_cubic():
    #test the gradient on a bigger graph constructed using the gradient
-    def output(x,A):
+    def output(x, A):
-        orig_cost = theano.tensor.dot(x*x,theano.tensor.dot(A,x))
+        orig_cost = theano.tensor.dot(x * x, theano.tensor.dot(A, x))
        return theano.gradient.grad(orig_cost, x)
-    rng = np.random.RandomState([2012,8,28])
+    rng = np.random.RandomState([2012, 8, 28])
    vx = rng.randn(2)
-    vA = rng.randn(2,2)
+    vA = rng.randn(2, 2)
+    theano.tests.unittest_tools.verify_grad(output, [vx, vA])
-    theano.tests.unittest_tools.verify_grad(output,[vx,vA])
 def test_grad_int():
@@ -300,11 +320,11 @@ def test_grad_int():
    b = theano.tensor.vector()
    def make_grad_func(X):
-        Z = theano.tensor.dot(X,W) + b
+        Z = theano.tensor.dot(X, W) + b
        H = theano.tensor.nnet.sigmoid(Z)
        cost = H.sum()
-        g = gradient.grad(cost,X)
+        g = gradient.grad(cost, X)
-        return theano.function([X,W,b],g, on_unused_input = 'ignore')
+        return theano.function([X, W, b], g, on_unused_input='ignore')
    int_func = make_grad_func(theano.tensor.imatrix())
    #we have to use float64 as the float type to get the results to match
@@ -314,17 +334,17 @@ def test_grad_int():
    m = 5
    d = 3
    n = 4
-    rng = np.random.RandomState([2012,9,5])
+    rng = np.random.RandomState([2012, 9, 5])
    int_type = theano.tensor.imatrix().dtype
    float_type = 'float64'
-    X = np.cast[int_type](rng.randn(m,d) * 127.)
+    X = np.cast[int_type](rng.randn(m, d) * 127.)
-    W = np.cast[W.dtype](rng.randn(d,n))
+    W = np.cast[W.dtype](rng.randn(d, n))
    b = np.cast[b.dtype](rng.randn(n))
-    int_result = int_func(X,W,b)
+    int_result = int_func(X, W, b)
-    float_result = float_func(np.cast[float_type](X),W,b)
+    float_result = float_func(np.cast[float_type](X), W, b)
    assert np.allclose(int_result, float_result)
@@ -333,23 +353,23 @@ def test_grad_disconnected():
    #tests corner cases of gradient for shape and alloc
-    x = theano.tensor.vector(name = 'x')
+    x = theano.tensor.vector(name='x')
    total = x.sum()
    total.name = 'total'
    num_elements = x.shape[0]
    num_elements.name = 'num_elements'
-    silly_vector = theano.tensor.alloc( total / num_elements, num_elements)
+    silly_vector = theano.tensor.alloc(total / num_elements, num_elements)
    silly_vector.name = 'silly_vector'
    cost = silly_vector.sum()
    cost.name = 'cost'
    #note that cost simplifies to be the same as "total"
-    g = gradient.grad(cost, x, add_names = False)
+    g = gradient.grad(cost, x, add_names=False)
    #we still need to pass in x because it determines the shape of the output
-    f = theano.function([x],g)
+    f = theano.function([x], g)
-    rng = np.random.RandomState([2012,9,5])
+    rng = np.random.RandomState([2012, 9, 5])
    x = np.cast[x.dtype](rng.randn(3))
    g = f(x)
-    assert np.allclose(g,np.ones(x.shape,dtype=x.dtype))
+    assert np.allclose(g, np.ones(x.shape, dtype=x.dtype))
 def test_disconnected_nan():
@@ -361,27 +381,27 @@ def test_disconnected_nan():
    class Op1(theano.gof.Op):
        def make_node(self, x):
            return theano.Apply(self, inputs=[x],
-                    outputs = [ x.type(), theano.tensor.scalar() ])
+                    outputs=[x.type(), theano.tensor.scalar()])
        def connection_pattern(self, node):
            return [[True, False]]
        def grad(self, inputs, output_grads):
-            return [ inputs[0].zeros_like()  ]
+            return [inputs[0].zeros_like()]
    # Op2 has two inputs, f and g
    # Its gradient with respect to g is not defined
    class Op2(theano.gof.Op):
        def make_node(self, f, g):
-            return theano.Apply(self, inputs=[f,g],
+            return theano.Apply(self, inputs=[f, g],
-                    outputs = [ theano.tensor.scalar() ])
+                    outputs=[theano.tensor.scalar()])
        def grad(self, inputs, output_grads):
-            return [ inputs[0].zeros_like(), NullType()() ]
+            return [inputs[0].zeros_like(), NullType()()]
    x = theano.tensor.vector()
    f, g = Op1()(x)
-    cost = Op2()(f,g)
+    cost = Op2()(f, g)
    # cost is differentiable wrt x
    # but we can't tell that without using Op1's connection pattern
@@ -394,7 +414,6 @@ def test_disconnected_nan():
    # connection_pattern functionality worked correctly
 def test_sum_disconnected():
    # Tests that we can add DisconnectedType to other terms correctly
@@ -402,7 +421,7 @@ def test_sum_disconnected():
    y = x * 2.
    z = x + 1.
    cost = y + z
-    theano.tensor.grad(cost, x, consider_constant=[y,z])
+    theano.tensor.grad(cost, x, consider_constant=[y, z])
    # In an earlier version of theano, the above line would have failed
    # while trying to add two DisconnectedTypes

--- a/theano/tests/test_rop.py
+++ b/theano/tests/test_rop.py
@@ -47,7 +47,7 @@ class BreakRop(Op):
        out[0] = x
    def grad(self, inp, grads):
-        return [ grad_undefined(self, 0, inp[0]) ]
+        return [grad_undefined(self, 0, inp[0])]
    def R_op(self, inputs, eval_points):
        return [None]
@@ -330,21 +330,21 @@ class test_RopLop(RopLop_checker):
        m_ = tensor.matrix('m_')
        v_ = tensor.vector('v_')
-        mval = self.rng.uniform(size=(3,7)).astype(theano.config.floatX)
+        mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        vval = self.rng.uniform(size=(7,)).astype(theano.config.floatX)
-        m_val = self.rng.uniform(size=(3,7)).astype(theano.config.floatX)
+        m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        v_val = self.rng.uniform(size=(7,)).astype(theano.config.floatX)
-        rop_out1 = tensor.Rop([m, v, m+v], [m, v], [m_, v_])
+        rop_out1 = tensor.Rop([m, v, m + v], [m, v], [m_, v_])
        assert isinstance(rop_out1, list)
        assert len(rop_out1) == 3
-        rop_out2 = tensor.Rop((m, v, m+v), [m, v], [m_, v_])
+        rop_out2 = tensor.Rop((m, v, m + v), [m, v], [m_, v_])
        assert isinstance(rop_out2, tuple)
        assert len(rop_out2) == 3
-        lop_out1 = tensor.Lop([m, v, m+v], (m, v), [m_, v_])
+        lop_out1 = tensor.Lop([m, v, m + v], (m, v), [m_, v_])
        assert isinstance(lop_out1, tuple)
        assert len(lop_out1) == 2
-        lop_out2 = tensor.Lop((m, v, m+v), [m, v], [m_, v_])
+        lop_out2 = tensor.Lop((m, v, m + v), [m, v], [m_, v_])
        assert isinstance(lop_out2, list)
        assert len(lop_out2) == 2
@@ -353,4 +353,3 @@ class test_RopLop(RopLop_checker):
            all_outs.extend(o)
        f = theano.function([m, v, m_, v_], all_outs)
        f(mval, vval, m_val, v_val)