Merge pull request #3074 from harlouci/flake8_v2

flake8

Merge pull request #3074 from harlouci/flake8_v2
bd11e130 · Frédéric Bastien · cb08bc11 · fc6d2310 · bd11e130 · bd11e130
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
 """A `Type` and `Op` classes to work with numpy.ndarrays symbolically."""
-__docformat__ = "restructuredtext en"
 import sys
 import warnings
@@ -29,7 +27,6 @@ from theano.printing import pprint, min_informative_str
 # For history
 from theano.compile import Rebroadcast, Shape, shape
 # We use these exceptions as well.
 import theano.scalar.sharedvar
 from theano.gradient import grad_undefined
@@ -42,6 +39,8 @@ from theano.tensor.elemwise import Elemwise, DimShuffle, CAReduce, Sum
 import logging
 _logger = logging.getLogger("theano.tensor.basic")
+__docformat__ = "restructuredtext en"
 # This is needed as we will hide it later
 python_complex = complex
 python_any = any
@@ -620,8 +619,8 @@ def get_scalar_constant_value(orig_v, elemwise=True,
                    ret = [[None]]
                    v.owner.op.perform(v.owner, const, ret)
                    return ret[0][0]
-            elif (isinstance(v.owner.op, theano.tensor.subtensor.Subtensor)
+            elif (isinstance(v.owner.op, theano.tensor.subtensor.Subtensor) and
-                  and v.ndim == 0):
+                  v.ndim == 0):
                if isinstance(v.owner.inputs[0], TensorConstant):
                    cdata = tuple(v.owner.op.get_constant_idx(v.owner.inputs))
                    try:
@@ -1090,7 +1089,7 @@ scalar_from_tensor = ScalarFromTensor()
 # to be removed as we get the epydoc routine-documenting thing going
-#-JB 20080924
+# -JB 20080924
 def _conversion(real_value, name):
    __oplist_tag(real_value, 'casting')
    real_value.__module__ = 'tensor.basic'
@@ -1235,8 +1234,8 @@ class MaxAndArgmax(Op):
                raise TypeError(
                    "MaxAndArgmax needs a constant axis. Got %s" % axis)
            else:
-                assert (axis.dtype.startswith("int")
+                assert (axis.dtype.startswith("int") or
-                        or axis.dtype.startswith("uint"))
+                        axis.dtype.startswith("uint"))
                axis = int(axis.data)
        # we make the axis all positive to make the infer_shape work
        # with negative axis
@@ -1373,13 +1372,13 @@ class MaxAndArgmax(Op):
        # Lebesgue measure, the result may be interpreted as weak gradient.
        # @note: This function should work correctly for L{vector}s.
-#        (x, y), (gz, gw)
+        # (x, y), (gz, gw)
-#        gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
+        # gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
-#        gMax * dMax/dx + gArgMax * dArgMax/dx,
+        # gMax * dMax/dx + gArgMax * dArgMax/dx,
-#                           gMax * dMax/daxis + gArgMax * dArgMax/daxis
+        # gMax * dMax/daxis + gArgMax * dArgMax/daxis
-#       g_max has one less dimension than x, so you need to complete
+        # g_max has one less dimension than x, so you need to complete
-#        g_max to x's shape when axis=0 the broadcasting mechanism
+        # g_max to x's shape when axis=0 the broadcasting mechanism
-#        does it automatically
+        # does it automatically
        x, axis = inp
        g_max, g_max_idx = grads
@@ -2078,7 +2077,7 @@ def chi2sf(x, k):
 # numpy.real(float32) return a view on the inputs.
-#@_scal_elemwise_with_nfunc('real', 1, 1)
+# @_scal_elemwise_with_nfunc('real', 1, 1)
 @_scal_elemwise
 def real(z):
    """Return real component of complex-valued tensor `z`"""
@@ -2116,7 +2115,7 @@ def complex_from_polar(abs, angle):
 # fill, _fill_inplace = _elemwise(scal.second, 'fill',
-    #"""fill WRITEME (elemwise)""")
+# """fill WRITEME (elemwise)""")
 @_scal_elemwise
 def second(a, b):
    """Create a matrix by filling the shape of a with b"""
@@ -3540,8 +3539,8 @@ class Join(Op):
        dtypes = [x.type.dtype for x in as_tensor_variable_args]
        out_dtype = scal.upcast(*dtypes)
-        output_maker = lambda bcastable: tensor(dtype=out_dtype,
+        def output_maker(bcastable):
-                                                broadcastable=bcastable)
+            return tensor(dtype=out_dtype, broadcastable=bcastable)
        return self._make_node_internal(
            axis, tensors, as_tensor_variable_args, output_maker)
@@ -4361,8 +4360,7 @@ class Tile(Op):
    def make_node(self, x, reps):
        warnings.warn((
-            "Tile op is deprecated, use tile function instead."),
+            "Tile op is deprecated, use tile function instead."), stacklevel=3)
-                      stacklevel=3)
        x = as_tensor_variable(x)
        reps = as_tensor_variable(reps)
        return gof.Apply(self, [x, reps], [tensor(x.type.dtype, [False] *
@@ -4427,8 +4425,9 @@ def tile(x, reps, ndim=None):
    except TypeError:
        raise ValueError("reps must be iterable")
    if not numpy.all([isinstance(r, integer_types) or
-        (isinstance(r, TensorVariable) and
+                      (isinstance(r, TensorVariable) and
-            r.dtype in ["int8", "int16", "int32", "int64"]) for r in reps]):
+                      r.dtype in ["int8", "int16", "int32", "int64"])
+                      for r in reps]):
        raise ValueError("elements of reps must be scalars of integer dtype")
    elif len(reps) != x.ndim:
        raise ValueError("len(reps) != x.ndim not currently supported")
@@ -4442,10 +4441,10 @@ def tile(x, reps, ndim=None):
    shape = [x.shape[i] for i in xrange(ndim)]
    alloc_shape = reps + shape
    y = alloc(x, *alloc_shape)
-    shuffle_ind = numpy.arange(ndim*2).reshape(2, ndim)
+    shuffle_ind = numpy.arange(ndim * 2).reshape(2, ndim)
    shuffle_ind = shuffle_ind.transpose().flatten()
    y = y.dimshuffle(*shuffle_ind)
-    new_shapes = [sh*reps[i] for i, sh in enumerate(shape)]
+    new_shapes = [sh * reps[i] for i, sh in enumerate(shape)]
    y = y.reshape(new_shapes)
    return y
@@ -4493,12 +4492,12 @@ class ARange(Op):
        def upcast(var):
            if ('int' in var.dtype and
-                # We do not want to cast uint64 to int64 as this can
+                    # We do not want to cast uint64 to int64 as this can
-                # loose information. If we upcast uint64 with int64,
+                    # loose information. If we upcast uint64 with int64,
-                # this give float64. This is safer then checking for
+                    # this give float64. This is safer then checking for
-                # uint64 in case we support [u]int128 or other in the
+                    # uint64 in case we support [u]int128 or other in the
-                # future.
+                    # future.
-                scal.upcast(var.dtype, 'int64') == 'int64'):
+                    scal.upcast(var.dtype, 'int64') == 'int64'):
                return cast(var, 'int64')
            return var
@@ -4512,8 +4511,8 @@ class ARange(Op):
        else:
            stop = upcast(stop)
            start = upcast(start)
-            return [(maximum(cast(ceil(cast((stop - start), 'float64')
+            return [(maximum(cast(ceil(cast((stop - start), 'float64') / step),
-                                       / step), 'int64'), 0),)]
+                    'int64'), 0),)]
    def perform(self, node, inp, out_):
        start, stop, step = inp
@@ -4742,8 +4741,8 @@ class PermuteRowElements(Op):
        # the gradient over these axes, but keep the dimension (as
        # broadcastable)
        broadcasted_dims = [dim for dim in xrange(gz.type.ndim)
-                            if x.type.broadcastable[dim]
+                            if x.type.broadcastable[dim] and
-                            and not gz.type.broadcastable[dim]]
+                            not gz.type.broadcastable[dim]]
        gx = Sum(axis=broadcasted_dims)(gx)
        # Sum(...) removed the dimensions in broadcasted_dims,
@@ -4876,17 +4875,17 @@ class Dot(Op):
            xgrad = gz * y
            ygrad = gz * x
-        #x is vector, y is matrix, grad is vector
+        # x is vector, y is matrix, grad is vector
        elif xdim == 1 and ydim == 2:
            xgrad = dot(gz, y.T)
            ygrad = outer(x.T, gz)
-        #x is matrix, y is vector, grad is vector
+        # x is matrix, y is vector, grad is vector
        elif xdim == 2 and ydim == 1:
            xgrad = outer(gz, y.T)
            ygrad = dot(x.T, gz)
-        #x is matrix, y is matrix, grad is matrix
+        # x is matrix, y is matrix, grad is matrix
        elif xdim == ydim == 2:
            xgrad = dot(gz, y.T)
            ygrad = dot(x.T, gz)
@@ -4958,8 +4957,8 @@ class Dot(Op):
                if eval_point_values[i] is not None and \
                   input_values[i].shape != eval_point_values[i].shape:
                    raise ValueError(
-                        'input ' + str(i) + ' and eval_point ' + str(i)
+                        'input ' + str(i) + ' and eval_point ' + str(i) +
-                        + ' to Dot.R_op should have the same shape, but '
+                        ' to Dot.R_op should have the same shape, but '
                        'their shapes are %s and %s, respectively' % (
                            str(input_values[i].shape),
                            str(eval_point_values[i].shape)))
@@ -5230,8 +5229,8 @@ def tensordot(a, b, axes=2):
                             'equal to b.ndim (b.ndim=%i, max(axes[1])=%i).' %
                             (b.ndim, numpy.max(numpy.array(b_axes))))
-        a_order = (tuple(x for x in tuple(xrange(a.ndim)) if x not in a_axes)
+        a_order = (tuple(x for x in tuple(xrange(a.ndim)) if x not in a_axes) +
-                   + a_axes)
+                   a_axes)
        b_order = (b_axes + tuple(x
                                  for x in tuple(xrange(b.ndim))
                                  if x not in b_axes))
@@ -5528,8 +5527,8 @@ class Choose(Op):
            # dimensions for the output
            l = []
            for sh1, sh2, b1 in zip(shapes[0],
-                                        shapes[1][1:],
+                                    shapes[1][1:],
-                                        node.inputs[0].broadcastable):
+                                    node.inputs[0].broadcastable):
                if b1:
                    l.append(sh2)
                else:
@@ -5635,7 +5634,7 @@ class AllocEmpty(gof.Op):
            out[0] = numpy.empty(sh, dtype=self.dtype)
    def c_code(self, node, name, inputs, out_, sub):
-        dtype = "NPY_"+self.dtype.upper()
+        dtype = "NPY_" + self.dtype.upper()
        out, = out_
        fail = sub['fail']
        shps = inputs

--- a/theano/tensor/blas.py
+++ b/theano/tensor/blas.py
@@ -266,7 +266,7 @@ SOMEPATH/Canopy_64bit/User/lib/python2.7/site-packages/numpy/distutils/system_in
            # Using "conda install mkl" will install both, as well as
            # optimized versions of numpy and scipy.
            try:
-                import mkl
+                import mkl #noqa
            except ImportError as e:
                _logger.info('Conda mkl is not available: %s', e)
            else:
@@ -1599,11 +1599,11 @@ class GemmOptimizer(Optimizer):
                        )
                        did_something = True
                        nb_replacement += 1
-                    except InconsistencyError as e:
+                    except InconsistencyError:
                        # TODO: retry other applications of gemm (see comment
                        # in _gemm_from_node)
                        nb_inconsistency_replace += 1
-                    except ReplacementDidntRemovedError as e:
+                    except ReplacementDidntRemovedError:
                        nb_replacement_didn_t_remove += 1
                        self.warned = True
        fgraph.remove_feature(u)

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -11,7 +11,7 @@ from six import iteritems
 from six.moves import xrange
 from theano.gof import Apply, Op, OpenMPOp
 from theano import scalar
-from theano.scalar import Scalar, get_scalar_type
+from theano.scalar import get_scalar_type
 from theano.printing import pprint
 from theano.tensor.utils import hash_from_dict
 from theano.gradient import DisconnectedType
@@ -50,7 +50,7 @@ def TensorConstant(*inputs, **kwargs):
 ##################
-### DimShuffle ###
+#   DimShuffle   #
 ##################
 class DimShuffle(Op):
@@ -139,8 +139,8 @@ class DimShuffle(Op):
                    raise TypeError("DimShuffle indices must be python ints.")
                if j >= len(input_broadcastable):
                    raise ValueError(("new_order[%d] is %d, but the input "
-                        "only has %d axes.") %
+                                      "only has %d axes.") %
-                        (i, j, len(input_broadcastable)))
+                                     (i, j, len(input_broadcastable)))
                if j in new_order[(i + 1):]:
                    raise ValueError("The same input dimension may not appear "
                                     "twice in the list of output dimensions",
@@ -207,7 +207,7 @@ class DimShuffle(Op):
                ob.append(ib[value])
        output = TensorType(dtype=input.type.dtype,
-                        broadcastable=ob).make_variable()
+                            broadcastable=ob).make_variable()
        return Apply(self, [input], [output])
@@ -219,12 +219,11 @@ class DimShuffle(Op):
            and self.input_broadcastable == other.input_broadcastable
    def _rehash(self):
-        self._hashval = (
+        self._hashval = (hash(type(self).__name__) ^
-                hash(type(self).__name__)
+                         hash(type(self).__module__) ^
-                ^ hash(type(self).__module__)
+                         hash(self.inplace) ^
-                ^ hash(self.inplace)
+                         hash(self.new_order) ^
-                ^ hash(self.new_order)
+                         hash(self.input_broadcastable))
-                ^ hash(self.input_broadcastable))
    def __hash__(self):
        return self._hashval
@@ -232,7 +231,7 @@ class DimShuffle(Op):
    def __str__(self):
        if self.inplace:
            return "InplaceDimShuffle{%s}" % ",".join(str(x)
-                    for x in self.new_order)
+                                                      for x in self.new_order)
        else:
            return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)
@@ -286,7 +285,8 @@ class DimShuffle(Op):
        nd_out = len(self.new_order)
        check_input_nd = [('if (PyArray_NDIM(%(input)s) != ' + str(nd_in) + ')'
-                '{PyErr_SetString(PyExc_NotImplementedError, "input nd"); %(fail)s;}')]
+                           '{PyErr_SetString(PyExc_NotImplementedError, '
+                           '"input nd"); %(fail)s;}')]
        clear_output = ['if (%(res)s) {Py_XDECREF(%(res)s);}']
@@ -296,8 +296,10 @@ class DimShuffle(Op):
            get_base = [
                '{ PyArrayObject * %(basename)s = %(input)s', 'Py_INCREF((PyObject*)%(basename)s)']
        else:
-            get_base = [('{ PyArrayObject * %(basename)s = (PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s, NULL,'
+            get_base = [('{ PyArrayObject * %(basename)s = '
-                    '0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY, NULL)')]
+                         '(PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s,'
+                         ' NULL, 0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY,'
+                         ' NULL)')]
        shape_statements = ['npy_intp dimensions[%i]' % nd_out]
        for i, o in enumerate(self.new_order):
@@ -312,9 +314,12 @@ class DimShuffle(Op):
        # set the strides of the non-broadcasted dimensions
        for i, o in enumerate(self.new_order):
            if o != 'x':
-                strides_statements += [('strides[' + str(i)
+                strides_statements += [('strides[' + str(i) +
-                     + '] = PyArray_DIMS(%(basename)s)[' + str(o)
+                                        '] = PyArray_DIMS(%(basename)s)[' +
-                     + '] == 1? 0 : PyArray_STRIDES(%(basename)s)[' + str(o) + ']')]
+                                        str(o) +
+                                        '] == 1? 0 : '
+                                        'PyArray_STRIDES(%(basename)s)[' +
+                                        str(o) + ']')]
            else:
                strides_statements += [('strides[' + str(i) + '] = 0')]
@@ -360,12 +365,12 @@ PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
 """
            '}']
-        full_code = statements(check_input_nd
+        full_code = statements(check_input_nd +
-                + clear_output
+                               clear_output +
-                + get_base
+                               get_base +
-                + shape_statements
+                               shape_statements +
-                + strides_statements
+                               strides_statements +
-                + close_bracket)
+                               close_bracket)
        if 0:
            print('C_CODE')
@@ -408,7 +413,7 @@ PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
 class DimShufflePrinter:
    def __p(self, new_order, pstate, r):
-        if new_order != () and  new_order[0] == 'x':
+        if new_order != () and new_order[0] == 'x':
            return "%s" % self.__p(new_order[1:], pstate, r)
 #            return "[%s]" % self.__p(new_order[1:], pstate, r)
        if list(new_order) == list(range(r.type.ndim)):
@@ -416,7 +421,7 @@ class DimShufflePrinter:
        if list(new_order) == list(reversed(range(r.type.ndim))):
            return "%s.T" % pstate.pprinter.process(r)
        return "DimShuffle{%s}(%s)" % (", ".join(map(str, new_order)),
-                pstate.pprinter.process(r))
+                                       pstate.pprinter.process(r))
    def process(self, r, pstate):
        if r.owner is None:
@@ -428,11 +433,11 @@ class DimShufflePrinter:
            raise TypeError("Can only print DimShuffle.")
 pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, DimShuffle),
-        DimShufflePrinter())
+              DimShufflePrinter())
 ################
-### Elemwise ###
+#   Elemwise   #
 ################
 class Elemwise(OpenMPOp):
@@ -496,7 +501,7 @@ class Elemwise(OpenMPOp):
            self.nfunc = getattr(numpy, nfunc_spec[0])
        elif scalar_op.nin > 0:
            self.ufunc = numpy.frompyfunc(scalar_op.impl, scalar_op.nin,
-                    scalar_op.nout)
+                                          scalar_op.nout)
        # precompute the hash of this node
        self._rehash()
@@ -518,7 +523,8 @@ class Elemwise(OpenMPOp):
            self.nfunc = getattr(numpy, self.nfunc_spec[0])
        elif self.scalar_op.nin > 0:
            self.ufunc = numpy.frompyfunc(self.scalar_op.impl,
-                    self.scalar_op.nin, self.scalar_op.nout)
+                                          self.scalar_op.nin,
+                                          self.scalar_op.nout)
        self._rehash()
    def make_node(self, *inputs):
@@ -557,15 +563,16 @@ class Elemwise(OpenMPOp):
        # it is multiplied by nout because Elemwise supports multiple outputs
        # (nout of them)
        out_broadcastables = [[all(bcast)
-            for bcast in izip(*[input.type.broadcastable
+                               for bcast in
-                for input in inputs])]] * shadow.nout
+                               izip(*[input.type.broadcastable
+                                      for input in inputs])]] * shadow.nout
        # inplace_pattern maps output idx -> input idx
        inplace_pattern = self.inplace_pattern
        if inplace_pattern:
            for overwriter, overwritten in iteritems(inplace_pattern):
                for ob, ib in izip(out_broadcastables[overwriter],
-                                  inputs[overwritten].type.broadcastable):
+                                   inputs[overwritten].type.broadcastable):
                    if ib and not ob:
                        raise ValueError(
                            "Operation cannot be done inplace on an input "
@@ -579,8 +586,8 @@ class Elemwise(OpenMPOp):
                ([i.type.dtype for i in inputs], out_dtypes, inplace_pattern)))
        outputs = [TensorType(dtype=dtype, broadcastable=broadcastable)()
-            for dtype, broadcastable in izip(out_dtypes, out_broadcastables)
+                   for dtype, broadcastable in izip(out_dtypes,
-            ]
+                                                    out_broadcastables)]
        return Apply(self, inputs, outputs)
    def __eq__(self, other):
@@ -589,8 +596,8 @@ class Elemwise(OpenMPOp):
            other_items = list(other.inplace_pattern.items())
            items.sort()
            other_items.sort()
-            rval = ((self.scalar_op == other.scalar_op)
+            rval = ((self.scalar_op == other.scalar_op) and
-                    and (items == other_items))
+                    (items == other_items))
            return rval
        return False
@@ -628,7 +635,7 @@ class Elemwise(OpenMPOp):
            rop_out = None
            for jdx, (inp, eval_point) in enumerate(izip(inputs,
-                                                        eval_points)):
+                                                    eval_points)):
                # if None, then we can just ignore this branch ..
                # what we do is to assume that for any non-differentiable
                # branch, the gradient is actually 0, which I think is not
@@ -668,7 +675,7 @@ class Elemwise(OpenMPOp):
        # to the gradient.grad method when the outputs have
        # some integer and some floating point outputs
        if False in [str(out.type.dtype).find('int') == -1
-                for out in outs]:
+                     for out in outs]:
            # For integer output, return value may
            # only be zero or undefined
            # We don't bother with trying to check
@@ -699,7 +706,7 @@ class Elemwise(OpenMPOp):
            # we can sum over them
            # todo: only count dimensions that were effectively broadcasted
            to_sum = [j for j, bcast in enumerate(ipt.type.broadcastable)
-                    if bcast]
+                      if bcast]
            if to_sum:
                shuffle = []
@@ -714,7 +721,7 @@ class Elemwise(OpenMPOp):
                # close for
                sr = Sum(axis=to_sum)(rval[i])
                sr = sr.dimshuffle(shuffle)
-                #sr = DimShuffle(sr.type.broadcastable, shuffle)(sr)
+                # sr = DimShuffle(sr.type.broadcastable, shuffle)(sr)
                rval[i] = sr
            # close if
        # close for
@@ -747,7 +754,7 @@ class Elemwise(OpenMPOp):
        if not isinstance(scalar_igrads, (list, tuple)):
            raise TypeError('%s.grad returned %s instead of list or tuple' %
-                    (str(self.scalar_op), str(type(scalar_igrads))))
+                            (str(self.scalar_op), str(type(scalar_igrads))))
        nd = len(inputs[0].type.broadcastable)  # this is the same for everyone
@@ -787,9 +794,8 @@ class Elemwise(OpenMPOp):
            # should be disabled.
            super(Elemwise, self).perform(node, inputs, output_storage)
-        maxsize = max(len(input.shape) for input in inputs)
        for dims in izip(*[list(zip(input.shape, sinput.type.broadcastable))
-                          for input, sinput in zip(inputs, node.inputs)]):
+                           for input, sinput in zip(inputs, node.inputs)]):
            if max(d for d, b in dims) != 1 and (1, False) in dims:
                # yes there may be more compact ways to write this code,
                # but please maintain python 2.4 compatibility
@@ -1115,7 +1121,7 @@ class Elemwise(OpenMPOp):
        # use it! The scalar_op need to check the broadcast flag himself.
        if (all([o.ndim >= 1 for o in node.outputs]) and
            # Don't use the contig code for broadcasted scalar.
-            not all(node.outputs[0].broadcastable)):
+                not all(node.outputs[0].broadcastable)):
            contig = None
            try:
                contig = self.scalar_op.c_code_contiguous(
@@ -1192,19 +1198,20 @@ class Elemwise(OpenMPOp):
        return self.scalar_op.c_support_code()
    def c_support_code_apply(self, node, nodename):
-        support_code = self.scalar_op.c_support_code_apply(node,
+        support_code = self.scalar_op.c_support_code_apply(node, nodename +
-                nodename + '_scalar_')
+                                                           '_scalar_')
        return support_code
    def c_code_cache_version_apply(self, node):
        version = [12]  # the version corresponding to the c code in this Op
        # now we insert versions for the ops on which we depend...
-        scalar_node = Apply(self.scalar_op,
+        scalar_node = Apply(
-                [get_scalar_type(dtype=input.type.dtype).make_variable()
+            self.scalar_op,
-                 for input in node.inputs],
+            [get_scalar_type(dtype=input.type.dtype).make_variable()
-                [get_scalar_type(dtype=output.type.dtype).make_variable()
+             for input in node.inputs],
-                 for output in node.outputs])
+            [get_scalar_type(dtype=output.type.dtype).make_variable()
+             for output in node.outputs])
        version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
        for i in node.inputs + node.outputs:
            version.append(get_scalar_type(dtype=i.type.dtype).c_code_cache_version())
@@ -1233,7 +1240,7 @@ class Elemwise(OpenMPOp):
 ################
-### CAReduce ###
+#   CAReduce   #
 ################
 class CAReduce(Op):
@@ -1325,8 +1332,8 @@ class CAReduce(Op):
        if self.axis is not None:
            for axis in self.axis:
-                if (axis >= input.type.ndim
+                if (axis >= input.type.ndim or
-                        or (axis < 0 and abs(axis) > input.type.ndim)):
+                        (axis < 0 and abs(axis) > input.type.ndim)):
                    raise ValueError((
                        'Not enough dimensions on %s to reduce on axis %s'
                        % (input, axis)))
@@ -1366,9 +1373,9 @@ class CAReduce(Op):
        self.set_ufunc(self.scalar_op)
    def __eq__(self, other):
-        return (type(self) == type(other)
+        return (type(self) == type(other) and
-                and self.scalar_op == other.scalar_op
+                self.scalar_op == other.scalar_op and
-                and self.axis == other.axis)
+                self.axis == other.axis)
    def __hash__(self):
        if self.axis is None:
@@ -1420,13 +1427,13 @@ class CAReduce(Op):
                    # was built with "frompyfunc". We need to find out if we
                    # are in one of these cases (only "object" is supported in
                    # the output).
-                    if ((self.ufunc.ntypes == 1)
+                    if ((self.ufunc.ntypes == 1) and
-                            and (self.ufunc.types[0][-1] == 'O')):
+                            (self.ufunc.types[0][-1] == 'O')):
                        variable = self.ufunc.reduce(variable, dimension,
-                                dtype='object')
+                                                     dtype='object')
                    else:
                        variable = self.ufunc.reduce(variable, dimension,
-                                dtype=acc_dtype)
+                                                     dtype=acc_dtype)
            variable = numpy.asarray(variable)
            if numpy.may_share_memory(variable, input):
@@ -1434,7 +1441,7 @@ class CAReduce(Op):
                # We don't want this.
                variable = variable.copy()
            output[0] = theano._asarray(variable,
-                    dtype=node.outputs[0].type.dtype)
+                                        dtype=node.outputs[0].type.dtype)
        else:
            # Force a copy
            output[0] = numpy.array(variable, copy=True,
@@ -1568,27 +1575,25 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
                   """ % locals()
        else:
            raise TypeError(
-                    "The CAReduce.scalar_op must have an identity field.")
+                "The CAReduce.scalar_op must have an identity field.")
-        task0_decl = (
+        task0_decl = ("%(dtype)s& %(name)s_i = *%(name)s_iter;\n"
-                "%(dtype)s& %(name)s_i = *%(name)s_iter;\n"
+                      "%(name)s_i = %(identity)s;"
-                "%(name)s_i = %(identity)s;"
+                      % dict(dtype=adtype, name=aname, identity=identity))
-                % dict(dtype=adtype, name=aname, identity=identity))
        task1_decl = ("%(dtype)s& %(name)s_i = *%(name)s_iter;\n"
-                % dict(dtype=idtype, name=inames[0]))
+                      % dict(dtype=idtype, name=inames[0]))
        task1_code = self.scalar_op.c_code(
-                Apply(
+            Apply(self.scalar_op,
-                    self.scalar_op,
+                  [get_scalar_type(dtype=input.type.dtype).make_variable()
-                    [get_scalar_type(dtype=input.type.dtype).make_variable()
+                   for input in (node.inputs * 2)],
-                        for input in (node.inputs * 2)],
+                  [get_scalar_type(dtype=output.type.dtype).make_variable()
-                    [get_scalar_type(dtype=output.type.dtype).make_variable()
+                   for input in node.outputs]),
-                        for input in node.outputs]),
+            None,
-                None,
+            ["%s_i" % aname, "%s_i" % inames[0]],
-                ["%s_i" % aname, "%s_i" % inames[0]],
+            ["%s_i" % aname],
-                ["%s_i" % aname],
+            sub)
-                sub)
        code1 = """
        {
            %(task1_decl)s
@@ -1600,11 +1605,10 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
            if len(axis) == 1:
                all_code = [("", "")] * nnested + [(task0_decl, code1), ""]
            else:
-                all_code = (
+                all_code = ([("", "")] * nnested +
-                        [("", "")] * nnested
+                            [(task0_decl, "")] +
-                        + [(task0_decl, "")]
+                            [("", "")] * (len(axis) - 2) +
-                        + [("", "")] * (len(axis) - 2)
+                            [("", code1), ""])
-                        + [("", code1), ""])
        else:
            all_code = [task0_decl + code1]
        loop = cgen.make_loop_careduce(
@@ -1632,11 +1636,12 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
        version = [5]  # the version corresponding to the c code in this Op
        # now we insert versions for the ops on which we depend...
-        scalar_node = Apply(self.scalar_op,
+        scalar_node = Apply(
-                [get_scalar_type(dtype=input.type.dtype).make_variable()
+            self.scalar_op,
-                 for input in node.inputs],
+            [get_scalar_type(dtype=input.type.dtype).make_variable()
-                [get_scalar_type(dtype=output.type.dtype).make_variable()
+             for input in node.inputs],
-                 for output in node.outputs])
+            [get_scalar_type(dtype=output.type.dtype).make_variable()
+             for output in node.outputs])
        version.append(self.scalar_op.c_code_cache_version_apply(scalar_node))
        for i in node.inputs + node.outputs:
            version.append(get_scalar_type(dtype=i.type.dtype).c_code_cache_version())
@@ -1760,9 +1765,9 @@ class CAReduceDtype(CAReduce):
        self.acc_dtype = acc_dtype
    def __eq__(self, other):
-        return (CAReduce.__eq__(self, other)
+        return (CAReduce.__eq__(self, other) and
-                and self.dtype == other.dtype
+                self.dtype == other.dtype and
-                and self.acc_dtype == other.acc_dtype)
+                self.acc_dtype == other.acc_dtype)
    def __hash__(self):
        return CAReduce.__hash__(self) ^ hash((self.dtype, self.acc_dtype))
@@ -1968,8 +1973,8 @@ class Prod(CAReduceDtype):
            self.no_zeros_in_input = False
    def __eq__(self, other):
-        return (CAReduceDtype.__eq__(self, other)
+        return (CAReduceDtype.__eq__(self, other) and
-                and self.no_zeros_in_input == other.no_zeros_in_input)
+                self.no_zeros_in_input == other.no_zeros_in_input)
    def __hash__(self):
        return (CAReduceDtype.__hash__(self) ^
@@ -2124,25 +2129,26 @@ class MulWithoutZeros(scalar.BinaryScalarOp):
    def c_code(self, node, name, inp, out, sub):
        x, y = inp
        z, = out
-        return (("%(z)s = ((%(x)s == 0) ? (%(y)s) : "
+        return (("%(z)s = ((%(x)s == 0) ? (%(y)s) : " +
-                 + "((%(y)s == 0) ? (%(x)s) : ((%(y)s)*(%(x)s))) );")
+                "((%(y)s == 0) ? (%(x)s) : ((%(y)s)*(%(x)s))) );")
                % locals())
    def c_code_cache_version(self):
        return (1,)
-mul_without_zeros = MulWithoutZeros(scalar.upcast_out,
+mul_without_zeros = MulWithoutZeros(scalar.upcast_out, name='mul_without_zeros')
-        name='mul_without_zeros')
 class ProdWithoutZeros(CAReduceDtype):
    def __init__(self, axis=None, dtype=None, acc_dtype=None):
        CAReduceDtype.__init__(self, mul_without_zeros, axis=axis,
                               dtype=dtype, acc_dtype=acc_dtype)
    def grad(self, inp, grads):
        a, = inp
-        a_grad = theano.gradient.grad_not_implemented(self, 0, a,
+        a_grad = theano.gradient.grad_not_implemented(
-                "2nd derivatives of `product(a)` is not currently supported." 
+            self, 0, a,
-                "If `a` is guarenteed to contains no zeros, use `product(a, no_zeros_in_input=True)`."
+            "2nd derivatives of `product(a)` is not currently supported."
-                )
+            "If `a` is guarenteed to contains no zeros, use "
+            "`product(a, no_zeros_in_input=True)`.")
        return [a_grad]
--- a/theano/tensor/inplace.py
+++ b/theano/tensor/inplace.py
@@ -28,7 +28,6 @@ def _scal_inplace(symbol):
    def chk(pstate, r):
        if not r.owner:
            return False
-        op = r.owner.op
        return r.owner.op == rval
    pprint.assign(chk, printing.FunctionPrinter(symbolname.replace('_inplace', '=')))

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -6,8 +6,6 @@ from __future__ import print_function
 # TODO: 0*x -> 0
 import logging
-_logger = logging.getLogger('theano.tensor.opt')
 import itertools
 import operator
 import sys
@@ -34,12 +32,10 @@ from theano.tensor.subtensor import (get_idx_list, get_canonical_form_slice,
                                     Subtensor, IncSubtensor, make_constant,
                                     AdvancedIncSubtensor1,
                                     AdvancedIncSubtensor,
-                                     AdvancedSubtensor,
                                     AdvancedSubtensor1,
                                     advanced_subtensor,
                                     advanced_subtensor1,
-                                     advanced_inc_subtensor1,
+                                     advanced_inc_subtensor1)
-                                     inc_subtensor)
 from theano import scalar
 from theano.scalar import basic
 from theano.tensor import basic as T
@@ -56,6 +52,8 @@ from theano.gof import toolbox
 from theano.tensor.basic import get_scalar_constant_value, ShapeError, NotScalarConstantError
 from six import StringIO
+_logger = logging.getLogger('theano.tensor.opt')
 theano.configparser.AddConfigVar('on_shape_error',
                                 "warn: print a warning and use the default"
                                 " value. raise: raise an error",
@@ -165,23 +163,24 @@ def broadcast_like(value, template, fgraph, dtype=None):
    # the template may have 1s in its shape without being broadcastable
    if rval.broadcastable != template.broadcastable:
        rval = T.unbroadcast(rval, *[i for i in xrange(rval.ndim)
-                                     if rval.broadcastable[i]
+                                     if rval.broadcastable[i] and
-            and not template.broadcastable[i]])
+                                     not template.broadcastable[i]])
    assert rval.type.dtype == dtype
    if rval.type.broadcastable != template.broadcastable:
        raise AssertionError("rval.type.broadcastable is " +
-                str(rval.type.broadcastable) +
+                             str(rval.type.broadcastable) +
-                " but template.broadcastable is" +
+                             " but template.broadcastable is" +
-                str(template.broadcastable))
+                             str(template.broadcastable))
    return rval
-theano.configparser.AddConfigVar('tensor.insert_inplace_optimizer_validate_nb',
+theano.configparser.AddConfigVar(
-        "-1: auto, if graph have less then 500 nodes 1, else 10",
+    'tensor.insert_inplace_optimizer_validate_nb',
-        theano.configparser.IntParam(-1),
+    "-1: auto, if graph have less then 500 nodes 1, else 10",
-        in_c_key=False)
+    theano.configparser.IntParam(-1),
+    in_c_key=False)
 def inplace_elemwise_optimizer_op(OP):
@@ -251,11 +250,10 @@ def inplace_elemwise_optimizer_op(OP):
            # target.
            # Remove here as faster.
            candidate_inputs = [i for i in xrange(len(node.inputs))
-                                if i not in baseline.values() \
+                                if i not in baseline.values() and
-                                    and not isinstance(node.inputs[i],
+                                not isinstance(node.inputs[i], Constant) and
-                                                       Constant)\
+                                not fgraph.destroyers(node.inputs[i]) and
-                                    and not fgraph.destroyers(node.inputs[i])\
+                                node.inputs[i] not in protected_inputs]
-                                    and node.inputs[i] not in protected_inputs]
            verbose = False
@@ -265,7 +263,7 @@ def inplace_elemwise_optimizer_op(OP):
                for candidate_input in candidate_inputs:
                    # remove inputs that don't have the same dtype as the output
                    if node.inputs[candidate_input].type != node.outputs[
-                        candidate_output].type:
+                            candidate_output].type:
                        continue
                    inplace_pattern = dict(baseline)
@@ -274,20 +272,20 @@ def inplace_elemwise_optimizer_op(OP):
                        if hasattr(op.scalar_op, "make_new_inplace"):
                            new_scal = op.scalar_op.make_new_inplace(
                                scalar.transfer_type(
-                                    *[inplace_pattern.get(i, None) \
+                                    *[inplace_pattern.get(i, None)
-                                          for i in xrange(len(node.outputs))]))
+                                      for i in xrange(len(node.outputs))]))
                        else:
                            new_scal = op.scalar_op.__class__(
                                scalar.transfer_type(
-                                    *[inplace_pattern.get(i, None) \
+                                    *[inplace_pattern.get(i, None)
-                                          for i in xrange(len(node.outputs))]))
+                                      for i in xrange(len(node.outputs))]))
                        new_outputs = OP(new_scal, inplace_pattern)(
-                                *node.inputs, **dict(return_list=True))
+                            *node.inputs, **dict(return_list=True))
                        new_node = new_outputs[0].owner
                        for r, new_r in zip(node.outputs, new_outputs):
                            fgraph.replace(r, new_r,
-                                        reason="inplace_elemwise_optimizer")
+                                           reason="inplace_elemwise_optimizer")
                        nb_change_no_validate += 1
                        if nb_change_no_validate >= check_each_change:
                            fgraph.validate()
@@ -295,9 +293,9 @@ def inplace_elemwise_optimizer_op(OP):
                            nb_change_no_validate = 0
                    except (ValueError, TypeError, InconsistencyError) as e:
                        if check_each_change != 1 and not raised_warning:
-                            print((
+                            print(("Some inplace optimization was not "
-                                    "Some inplace optimization was not "
+                                   "performed due to unexpected error:"),
-                                    "performed due to unexpected error:"), file=sys.stderr)
+                                  file=sys.stderr)
                            print(e, file=sys.stderr)
                            raised_warning = True
                        fgraph.revert(chk)
@@ -313,7 +311,8 @@ def inplace_elemwise_optimizer_op(OP):
            except Exception:
                if not raised_warning:
                    print(("Some inplace optimization was not "
-                                          "performed due to unexpected error"), file=sys.stderr)
+                           "performed due to unexpected error"),
+                          file=sys.stderr)
                fgraph.revert(chk)
    return inplace_elemwise_optimizer
@@ -381,8 +380,8 @@ def register_specialize_device(lopt, *tags, **kwargs):
 # Register merge_optimizer as a global opt during canonicalize
-compile.optdb['canonicalize'].register(
+compile.optdb['canonicalize'].register('canon_merge', merge_optimizer,
-        'canon_merge', merge_optimizer, 'fast_run', final_opt=True)
+                                       'fast_run', final_opt=True)
 #####################
@@ -512,11 +511,10 @@ def local_lift_transpose_through_dot(node):
    inplace.  The newly-introduced transpositions are not inplace, this will
    be taken care of in a later optimization phase.
    """
-    if not (isinstance(node.op, T.DimShuffle)
+    if not (isinstance(node.op, T.DimShuffle) and node.op.new_order == (1, 0)):
-            and node.op.new_order == (1, 0)):
        return False
-    if not (node.inputs[0].owner
+    if not (node.inputs[0].owner and
-            and isinstance(node.inputs[0].owner.op, T.Dot)):
+            isinstance(node.inputs[0].owner.op, T.Dot)):
        return False
    x, y = node.inputs[0].owner.inputs
@@ -601,22 +599,19 @@ class MakeVector(T.Op):
    def make_node(self, *inputs):
        inputs = list(map(T.as_tensor_variable, inputs))
-        if not all(a.type == inputs[0].type for a in inputs) or (
+        if (not all(a.type == inputs[0].type for a in inputs) or
-            len(inputs) > 0 and inputs[0].dtype != self.dtype):
+                (len(inputs) > 0 and inputs[0].dtype != self.dtype)):
-            dtype = theano.scalar.upcast(self.dtype,
+            dtype = theano.scalar.upcast(self.dtype, *[i.dtype for i in inputs])
-                                         *[i.dtype for i in inputs])
            # upcast the input to the determined dtype,
            # but don't downcast anything
            assert dtype == self.dtype, (
-                    "The upcast of the inputs to MakeVector should match the "
+                "The upcast of the inputs to MakeVector should match the "
-                    "dtype given in __init__.")
+                "dtype given in __init__.")
            if not all(self.dtype == T.cast(i, dtype=dtype).dtype
                       for i in inputs):
                raise TypeError("MakeVector.make_node expected inputs"
-                                " upcastable to %s. got %s" % (
+                                " upcastable to %s. got %s" %
-                        self.dtype,
+                                (self.dtype, str([i.dtype for i in inputs])))
-                        str([i.dtype for i in inputs])
-                        ))
            inputs = [T.cast(i, dtype=dtype) for i in inputs]
        assert all(self.dtype == a.dtype for a in inputs)
        assert all(a.ndim == 0 for a in inputs)
@@ -625,11 +620,9 @@ class MakeVector(T.Op):
            dtype = inputs[0].type.dtype
        else:
            dtype = self.dtype
-        #bcastable = (len(inputs) == 1)
+        # bcastable = (len(inputs) == 1)
        bcastable = False
-        otype = T.TensorType(
+        otype = T.TensorType(broadcastable=(bcastable,), dtype=dtype)
-                broadcastable=(bcastable,),
-                dtype=dtype)
        return T.Apply(self, inputs, [otype()])
    def __str__(self):
@@ -700,13 +693,14 @@ class MakeVectorPrinter:
        if r.owner is None:
            raise TypeError("Can only print make_vector.")
        elif isinstance(r.owner.op, MakeVector):
-            return "[%s]" % ", ".join(pstate.pprinter.process(
+            return "[%s]" % ", ".join(
-                    input, pstate.clone(precedence=1000)) for input
+                pstate.pprinter.process(input, pstate.clone(precedence=1000))
-                                      in r.owner.inputs)
+                for input in r.owner.inputs)
        else:
            raise TypeError("Can only print make_vector.")
-T.pprint.assign(lambda pstate, r: r.owner and isinstance(
-        r.owner.op, MakeVector), MakeVectorPrinter())
+T.pprint.assign(lambda pstate, r: r.owner and
+                isinstance(r.owner.op, MakeVector), MakeVectorPrinter())
 class ShapeFeature(object):
@@ -843,8 +837,8 @@ class ShapeFeature(object):
            # by always returning the same object to represent 1
            return self.lscalar_one
        if (type(s_i) in integer_types or
-            isinstance(s_i, numpy.integer) or
+                isinstance(s_i, numpy.integer) or
-            (isinstance(s_i, numpy.ndarray) and s_i.ndim == 0)):
+                (isinstance(s_i, numpy.ndarray) and s_i.ndim == 0)):
            # this shape is a constant
            assert s_i >= 0
            return T.constant(s_i, dtype='int64')
@@ -859,9 +853,9 @@ class ShapeFeature(object):
        # s_i is x.shape[i], we change it to Shape_i.
        if (s_i.owner and
-            isinstance(s_i.owner.op, Subtensor) and
+                isinstance(s_i.owner.op, Subtensor) and
-            s_i.owner.inputs[0].owner and
+                s_i.owner.inputs[0].owner and
-            isinstance(s_i.owner.inputs[0].owner.op, T.Shape)):
+                isinstance(s_i.owner.inputs[0].owner.op, T.Shape)):
            assert s_i.ndim == 0
            assert len(s_i.owner.op.idx_list) == 1
@@ -883,7 +877,7 @@ class ShapeFeature(object):
            return s_i
        else:
            raise TypeError('Unsupported shape element',
-                    s_i, type(s_i), getattr(s_i, 'type', None))
+                            s_i, type(s_i), getattr(s_i, 'type', None))
    def set_shape(self, r, s):
        """Assign the shape `s` to previously un-shaped variable `r`.
@@ -910,7 +904,7 @@ class ShapeFeature(object):
            shape_vars = []
            for i in xrange(r.ndim):
                if (hasattr(r.type, 'broadcastable') and
-                    r.type.broadcastable[i]):
+                        r.type.broadcastable[i]):
                    shape_vars.append(self.lscalar_one)
                else:
                    shape_vars.append(self.unpack(s[i]))
@@ -947,8 +941,8 @@ class ShapeFeature(object):
            self.set_shape(r, other_shape)
            return
        if (other_r.owner and r.owner and
-            other_r.owner.inputs == r.owner.inputs and
+                other_r.owner.inputs == r.owner.inputs and
-            other_r.owner.op == r.owner.op):
+                other_r.owner.op == r.owner.op):
            # We are doing a merge. So the 2 shapes graph will be the
            # same.  This is only a speed optimization to call
            # ancestors() less frequently.
@@ -957,10 +951,10 @@ class ShapeFeature(object):
        # Merge other_shape with r_shape, giving the priority to other_shape
        merged_shape = []
        for i, ps in enumerate(other_shape):
-            if (ps.owner
+            if (ps.owner and
-                    and isinstance(getattr(ps.owner, 'op', None), Shape_i)
+                    isinstance(getattr(ps.owner, 'op', None), Shape_i) and
-                    and ps.owner.op.i == i
+                    ps.owner.op.i == i and
-                    and ps.owner.inputs[0] in (r, other_r)):
+                    ps.owner.inputs[0] in (r, other_r)):
                # If other_shape[i] is uninformative, use r_shape[i].
                # For now, we consider 2 cases of uninformative other_shape[i]:
                #  - Shape_i(i)(other_r);
@@ -1084,11 +1078,11 @@ class ShapeFeature(object):
                                                       r in node.inputs])
        except NotImplementedError as e:
            raise NotImplementedError(
-                    'Code called by infer_shape failed raising a '
+                'Code called by infer_shape failed raising a '
-                    'NotImplementedError. Raising NotImplementedError to '
+                'NotImplementedError. Raising NotImplementedError to '
-                    'indicate that a shape cannot be computed is no longer '
+                'indicate that a shape cannot be computed is no longer '
-                    'supported, and one should now use tensor.ShapeError '
+                'supported, and one should now use tensor.ShapeError '
-                    'instead. The original exception message is: %s' % e)
+                'instead. The original exception message is: %s' % e)
        except Exception as e:
            msg = ('Failed to infer_shape from Op %s.\nInput shapes: '
                   '%s\nException encountered during infer_shape: '
@@ -1108,10 +1102,10 @@ class ShapeFeature(object):
        if len(o_shapes) != len(node.outputs):
            raise Exception(
                ('The infer_shape method for the Op "%s" returned a list ' +
-                'with the wrong number of element: len(o_shapes) = %d ' +
+                 'with the wrong number of element: len(o_shapes) = %d ' +
-                ' != len(node.outputs) = %d') % (str(node.op),
+                 ' != len(node.outputs) = %d') % (str(node.op),
-                                                len(o_shapes),
+                                                  len(o_shapes),
-                                                len(node.outputs)))
+                                                  len(node.outputs)))
        # Ensure shapes are in 'int64'. This is to make sure the assert
        # found in the `local_useless_subtensor` optimization does not fail.
@@ -1173,9 +1167,9 @@ class ShapeFeature(object):
                    # with the InputToGpuOptimizer optimizer.
                    continue
                if (repl.owner and
-                    repl.owner.inputs[0] is shpnode.inputs[0] and
+                        repl.owner.inputs[0] is shpnode.inputs[0] and
-                    isinstance(repl.owner.op, Shape_i) and
+                        isinstance(repl.owner.op, Shape_i) and
-                    repl.owner.op.i == shpnode.op.i):
+                        repl.owner.op.i == shpnode.op.i):
                    # The replacement is a shape_i of the same
                    # input. So no need to do this equivalent
                    # replacement.
@@ -1239,7 +1233,7 @@ class ShapeFeature(object):
            if not dx.owner or not dy.owner:
                return False
            if (not isinstance(dx.owner.op, Shape_i) or
-                not isinstance(dy.owner.op, Shape_i)):
+                    not isinstance(dy.owner.op, Shape_i)):
                return False
            opx = dx.owner.op
            opy = dy.owner.op
@@ -1310,10 +1304,9 @@ def local_fill_to_alloc(node):
            return
            # TODO: cut out un-necessary dimshuffles of v
-        assert rval[0].type == node.outputs[0].type, ('rval', rval[0].type,
+        assert rval[0].type == node.outputs[0].type, (
-                'orig', node.outputs[0].type,
+            'rval', rval[0].type, 'orig', node.outputs[0].type, 'node',
-                'node', node,
+            node,)  # theano.printing.debugprint(node.outputs[0], file='str'))
-                )  # theano.printing.debugprint(node.outputs[0], file='str'))
        return rval
@@ -1404,7 +1397,7 @@ def local_subtensor_make_vector(node):
        try:
            idx, = node.op.idx_list
        except Exception:
-            #'how can you have multiple indexes into a shape?'
+            # 'how can you have multiple indexes into a shape?'
            raise
        if isinstance(idx, (scalar.Scalar, T.TensorType)):
@@ -1467,13 +1460,13 @@ def local_useless_elemwise(node):
    if isinstance(node.op, T.Elemwise):
        if node.op.scalar_op == theano.scalar.eq and len(node.inputs) == 2:
            if node.inputs[0] == node.inputs[1]:
-            # it is the same var in the graph. That will always be true
+                # it is the same var in the graph. That will always be true
                return [T.fill(node.inputs[0],
                               T.constant(1.0,
                                          dtype=node.outputs[0].type.dtype))]
        elif node.op.scalar_op == theano.scalar.neq and len(node.inputs) == 2:
            if node.inputs[0] == node.inputs[1]:
-            # it is the same var in the graph. That will always be false
+                # it is the same var in the graph. That will always be false
                return [T.fill(node.inputs[0],
                               T.constant(0.0,
                                          dtype=node.outputs[0].type.dtype))]
@@ -1482,8 +1475,8 @@ def local_useless_elemwise(node):
        elif node.op.scalar_op == theano.scalar.add and len(node.inputs) == 1:
            return [node.inputs[0]]
-        elif (node.op.scalar_op == theano.scalar.identity
+        elif (node.op.scalar_op == theano.scalar.identity and
-            and len(node.inputs) == 1):
+              len(node.inputs) == 1):
            return [node.inputs[0]]
@@ -1513,12 +1506,12 @@ def local_cast_cast(node):
          and the first cast cause an upcast.
    """
    if (not isinstance(node.op, T.Elemwise) or
-        not isinstance(node.op.scalar_op, scalar.Cast)):
+            not isinstance(node.op.scalar_op, scalar.Cast)):
        return
    x = node.inputs[0]
    if (not x.owner or
-        not isinstance(x.owner.op, T.Elemwise) or
+            not isinstance(x.owner.op, T.Elemwise) or
-        not isinstance(x.owner.op.scalar_op, scalar.Cast)):
+            not isinstance(x.owner.op.scalar_op, scalar.Cast)):
        return
    if node.op.scalar_op.o_type == x.owner.op.scalar_op.o_type:
        return [x]
@@ -1738,7 +1731,7 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
        # The broadcast pattern of the ouptut must match the broadcast
        # pattern of at least one of the inputs.
        if not any([i.type.broadcastable ==
-            node.outputs[0].type.broadcastable for i in node.inputs]):
+                    node.outputs[0].type.broadcastable for i in node.inputs]):
            return False
        def dimshuffled_alloc(i):
@@ -1749,10 +1742,8 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
        # At least one input must have an owner that is either a AllocOP or a
        # DimShuffleOP with an owner that is a AllocOP -- otherwise there is
        # nothing to optimize.
-        if not any([i.owner
+        if not any([i.owner and (isinstance(i.owner.op, AllocOP) or
-                    and (isinstance(i.owner.op, AllocOP) or
+                                 dimshuffled_alloc(i)) for i in node.inputs]):
-                         dimshuffled_alloc(i))
-                    for i in node.inputs]):
            return False
        # Search for input that we can use as a baseline for the dimensions.
@@ -1761,9 +1752,8 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
            if i.type.broadcastable == node.outputs[0].type.broadcastable:
                # Prefer an input that is not a AllocOP nor a DimShuffleOP of a
                # AllocOP so that all allocs can be optimized.
-                if not (i.owner
+                if not (i.owner and (isinstance(i.owner.op, AllocOP) or
-                        and (isinstance(i.owner.op, AllocOP)
+                        dimshuffled_alloc(i))):
-                             or dimshuffled_alloc(i))):
                    assert_op_idx = idx
                    break
@@ -1773,8 +1763,8 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
            # there is more than one then do all but one.  number of
            # inputs with alloc or dimshuffle alloc
            l2 = [i for i in node.inputs
-                  if (i.owner and (isinstance(i.owner.op, AllocOP)
+                  if (i.owner and (isinstance(i.owner.op, AllocOP) or
-                             or dimshuffled_alloc(i)))]
+                      dimshuffled_alloc(i)))]
            # If only 1 alloc or dimshuffle alloc, it is the one we
            # will use for the shape. So no alloc would be removed.
            if len(l2) > 1:
@@ -1794,14 +1784,13 @@ def local_elemwise_alloc_op(ElemwiseOP, AllocOP, DimShuffleOP):
        same_shape = node.fgraph.shape_feature.same_shape
        for i in node.inputs:
            # Remove alloc
-            if (i.owner and isinstance(i.owner.op, AllocOP)
+            if (i.owner and isinstance(i.owner.op, AllocOP) and
-                and i.owner.inputs[0].type != i.owner.outputs[0].type):
+                    i.owner.inputs[0].type != i.owner.outputs[0].type):
                # when i.owner.inputs[0].type == i.owner.outputs[0].type we
                # will remove that alloc later
                assert i.type.ndim == cmp_op.ndim
-                if (theano.config.experimental.local_alloc_elemwise_assert
+                if (theano.config.experimental.local_alloc_elemwise_assert and
-                    and not same_shape(i, cmp_op)):
+                        not same_shape(i, cmp_op)):
                    assert_op = assert_(assert_op,
                                        *[T.eq(i.shape[idx], cmp_op.shape[idx])
                                          for idx in xrange(i.type.ndim)
@@ -1891,7 +1880,7 @@ def local_upcast_elemwise_constant_inputs(node):
        scalar_op = node.op.scalar_op
        # print "aa", scalar_op.output_types_preference
        if (getattr(scalar_op, 'output_types_preference', None)
-            in (T.scal.upgrade_to_float, T.scal.upcast_out)):
+                in (T.scal.upgrade_to_float, T.scal.upcast_out)):
            # this is the kind of op that we can screw with the input
            # dtypes by upcasting explicitly
            output_dtype = node.outputs[0].type.dtype
@@ -1910,11 +1899,12 @@ def local_upcast_elemwise_constant_inputs(node):
                        else:
                            if shape_i is None:
                                return
-                            new_inputs.append(T.alloc(T.cast(cval_i,
+                            new_inputs.append(
-                                                             output_dtype),
+                                T.alloc(T.cast(cval_i, output_dtype),
-                                *[shape_i(d)(i) for d in xrange(i.ndim)]))
+                                        *[shape_i(d)(i)
-                            #print >> sys.stderr, "AAA",
+                                          for d in xrange(i.ndim)]))
-                            #*[Shape_i(d)(i) for d in xrange(i.ndim)]
+                            # print >> sys.stderr, "AAA",
+                            # *[Shape_i(d)(i) for d in xrange(i.ndim)]
                    except NotScalarConstantError:
                        # for the case of a non-scalar
                        if isinstance(i, T.TensorConstant):
@@ -1958,7 +1948,7 @@ def local_useless_inc_subtensor(node):
        except NotScalarConstantError:
            return
    if (node.inputs[0].ndim != node.inputs[1].ndim or
-        node.inputs[0].broadcastable != node.inputs[1].broadcastable):
+            node.inputs[0].broadcastable != node.inputs[1].broadcastable):
        # FB: I didn't check if this case can happen, but this opt
        # don't support it.
        return
@@ -1994,16 +1984,16 @@ def local_set_to_inc_subtensor(node):
    AdvancedIncSubtensor1(x, other, ilist, set_instead_of_inc=False)
    """
    if (isinstance(node.op, AdvancedIncSubtensor1) and
-        node.op.set_instead_of_inc == True and
+            node.op.set_instead_of_inc and
-        node.inputs[1].owner and
+            node.inputs[1].owner and
-        isinstance(node.inputs[1].owner.op, Elemwise) and
+            isinstance(node.inputs[1].owner.op, Elemwise) and
-        isinstance(node.inputs[1].owner.op.scalar_op, scalar.Add)):
+            isinstance(node.inputs[1].owner.op.scalar_op, scalar.Add)):
        addn = node.inputs[1].owner
        subn = None
        other = None
        if (addn.inputs[0].owner and
-            isinstance(addn.inputs[0].owner.op, AdvancedSubtensor1)):
+                isinstance(addn.inputs[0].owner.op, AdvancedSubtensor1)):
            subn = addn.inputs[0].owner
            other = addn.inputs[1]
        elif (addn.inputs[1].owner and
@@ -2013,7 +2003,7 @@ def local_set_to_inc_subtensor(node):
        else:
            return
        if (subn.inputs[1] != node.inputs[2] or
-            subn.inputs[0] != node.inputs[0]):
+                subn.inputs[0] != node.inputs[0]):
            return
        return [advanced_inc_subtensor1(node.inputs[0], other, node.inputs[2])]
@@ -2030,9 +2020,9 @@ def local_useless_slice(node):
        last_slice = len(slices)
        for s in slices[::-1]:
            # check if slice and then check slice indices
-            if (isinstance(s, slice) and s.start is None and s.stop is None
+            if (isinstance(s, slice) and s.start is None and s.stop is None and
-                and (s.step is None or T.extract_constant(s.step) == 1)):
+                    (s.step is None or T.extract_constant(s.step) == 1)):
-                    last_slice -= 1
+                last_slice -= 1
            else:
                break
        # check if we removed something
@@ -2098,11 +2088,10 @@ def local_useless_subtensor(node):
                # the same underlying variable.
                if (length_pos_shape_i.owner and
                        isinstance(length_pos_shape_i.owner.op,
-                            T.ScalarFromTensor)):
+                                   T.ScalarFromTensor)):
                    length_pos_shape_i = length_pos_shape_i.owner.inputs[0]
                elif (length_pos.owner and
-                        isinstance(length_pos.owner.op,
+                      isinstance(length_pos.owner.op, T.TensorFromScalar)):
-                            T.TensorFromScalar)):
                    length_pos = length_pos.owner.inputs[0]
                else:
                    # We did not find underlying variables of the same type
@@ -2322,8 +2311,8 @@ def merge_two_slices(slice1, len1, slice2, len2):
        pn_stop = sl1.start + (sl2.start - 1) * sl1.step
        pn_stop = T.switch(T.and_(T.lt(pn_stop, 0),
                                  T.gt(flen, 0)),
-                            -len1 - 1,
+                           -len1 - 1,
-                            T.minimum(pn_stop, sl1.stop))
+                           T.minimum(pn_stop, sl1.stop))
        pn_start = sl1.start + (sl2.stop - 1) * sl1.step
        pn_start = T.minimum(pn_start, sl1.stop)
        pn_start = T.maximum(pn_start, 0)
@@ -2345,9 +2334,8 @@ def merge_two_slices(slice1, len1, slice2, len2):
                                  pp_start))
        stop = T.switch(T.lt(reverse2 * reverse1, 0),
-                         T.switch(T.lt(reverse1, 0), np_stop, pn_stop),
+                        T.switch(T.lt(reverse1, 0), np_stop, pn_stop),
-                         T.switch(T.lt(reverse1, 0), nn_stop, pp_stop
+                        T.switch(T.lt(reverse1, 0), nn_stop, pp_stop))
-                                 ))
        step = T.switch(T.lt(reverse2 * reverse1, 0), n_step, p_step)
        start = T.switch(T.le(flen, 0), 0, start)
@@ -2463,7 +2451,7 @@ def local_subtensor_of_alloc(node):
            # We check that the corresponding val dimensions was
            # not a broadcasted dimensions.
            if (val.type.ndim > (i - n_added_dims) and
-                val.type.broadcastable[i - n_added_dims]):
+                    val.type.broadcastable[i - n_added_dims]):
                val_slices.append(slice(None))
            else:
                val_slices.append(sl)
@@ -2496,8 +2484,8 @@ def local_subtensor_of_alloc(node):
        rval[0] = theano.tensor.unbroadcast(
            rval[0],
            *[i for i, (b1, b2) in enumerate(zip(rval[0].broadcastable,
-                                                node.outputs[0].broadcastable))
+                                                 node.outputs[0].broadcastable))
-             if b1 and not b2])
+              if b1 and not b2])
    return rval
@@ -2518,7 +2506,7 @@ def local_subtensor_of_dot(node):
    if not isinstance(node.op, Subtensor):
        return
    if (not node.inputs[0].owner or
-        not isinstance(node.inputs[0].owner.op, T.Dot)):
+            not isinstance(node.inputs[0].owner.op, T.Dot)):
        return
    # If there is other node that use the outputs of the dot
    # We don't want to compute twice the sub part.
@@ -2540,7 +2528,8 @@ def local_subtensor_of_dot(node):
    # We skip this if b.ndim = 1, since then we just want b_sub = b, not b_sub = b[:]
    # (dot also handles b.ndim < 2 as a special case)
    if b.ndim > 1 and len(b_indices) >= b.ndim - 1:
-        b_indices = b_indices[:b.ndim-2] + (slice(None, None, None),) + b_indices[b.ndim-2:]
+        b_indices = (b_indices[:b.ndim - 2] +
+                     (slice(None, None, None),) + b_indices[b.ndim - 2:])
    a_sub = a.__getitem__(tuple(a_indices))
    b_sub = b.__getitem__(tuple(b_indices)) if b_indices else b
@@ -2583,14 +2572,13 @@ def local_IncSubtensor_serialize(node):
    """
    def movable(i):
        # Return True iff this is a incsubtensor that we can move
-        return i.owner \
+        return (i.owner and
-                and isinstance(i.owner.op, (IncSubtensor,
+                isinstance(i.owner.op, (IncSubtensor,
-                                            AdvancedIncSubtensor1,
+                                        AdvancedIncSubtensor1,
-                                            AdvancedIncSubtensor,
+                                        AdvancedIncSubtensor,)) and
-                                        )) \
+                i.type == o_type and
-                and i.type == o_type \
+                len(i.clients) == 1 and
-                and len(i.clients) == 1 \
+                not i.owner.op.set_instead_of_inc)
-                and not i.owner.op.set_instead_of_inc
    if node.op == T.add:
        o_type = node.outputs[0].type
@@ -2598,8 +2586,8 @@ def local_IncSubtensor_serialize(node):
        movable_inputs = [i for i in node.inputs if movable(i)]
        if movable_inputs:
-            new_inputs = [i for i in node.inputs if not movable(i)] \
+            new_inputs = ([i for i in node.inputs if not movable(i)] +
-                    + [mi.owner.inputs[0] for mi in movable_inputs]
+                          [mi.owner.inputs[0] for mi in movable_inputs])
            new_add = T.add(*new_inputs)
            # stack up the new incsubtensors
@@ -2638,9 +2626,10 @@ def local_inplace_setsubtensor(node):
        return [new_node]
    return False
 compile.optdb.register('local_inplace_setsubtensor',
-                       TopoOptimizer(local_inplace_setsubtensor,
+                       TopoOptimizer(
-    failure_callback=TopoOptimizer.warn_inplace), 60,
+                           local_inplace_setsubtensor,
-                       'fast_run', 'inplace')  # DEBUG
+                           failure_callback=TopoOptimizer.warn_inplace),
+                       60, 'fast_run', 'inplace')  # DEBUG
 @gof.local_optimizer([AdvancedIncSubtensor1], inplace=True)
@@ -2653,8 +2642,8 @@ def local_inplace_incsubtensor1(node):
    return False
 compile.optdb.register('local_inplace_incsubtensor1',
                       TopoOptimizer(
-        local_inplace_incsubtensor1,
+                           local_inplace_incsubtensor1,
-        failure_callback=TopoOptimizer.warn_inplace),
+                           failure_callback=TopoOptimizer.warn_inplace),
                       60, 'fast_run', 'inplace')  # DEBUG
@@ -2671,7 +2660,7 @@ def local_incsubtensor_of_zeros(node):
    if (isinstance(node.op, (IncSubtensor,
                             AdvancedIncSubtensor,
                             AdvancedIncSubtensor1)) and
-        not node.op.set_instead_of_inc):
+            not node.op.set_instead_of_inc):
        x = node.inputs[0]
        y = node.inputs[1]
        replace = False
@@ -2713,8 +2702,8 @@ def local_setsubtensor_of_constants(node):
            pass
        if (replace_x is not None and
-            replace_y is not None and
+                replace_y is not None and
-            replace_x == replace_y):
+                replace_x == replace_y):
            return [x]
        else:
            return False
@@ -2738,7 +2727,7 @@ def local_adv_sub1_adv_inc_sub1(node):
        return
    inp = node.inputs[0]
    if (not inp.owner or
-        not isinstance(inp.owner.op, AdvancedIncSubtensor1)):
+            not isinstance(inp.owner.op, AdvancedIncSubtensor1)):
        return
    idx = node.inputs[1]
    idx2 = inp.owner.inputs[2]
@@ -2747,13 +2736,13 @@ def local_adv_sub1_adv_inc_sub1(node):
    if idx is not idx2:
        return
    if (not inp.owner.op.set_instead_of_inc and
-        T.extract_constant(x) != 0):
+            T.extract_constant(x) != 0):
        return
-    cond = [T.all(T.and_(T.lt(idx, x.shape[0]),
+    cond = [T.all(T.and_(T.lt(idx, x.shape[0]), T.ge(idx, -x.shape[0])))]
-                        T.ge(idx, -x.shape[0])))]
    if not node.fgraph.shape_feature.same_shape(idx, y, 0, 0):
        cond.append(T.eq(idx.shape[0], y.shape[0]))
-    y = Assert("Bad indexing or shapes in a AdvancedIncSubtensor1 that was optimized away")(y, *cond)
+    y = Assert("Bad indexing or shapes in a AdvancedIncSubtensor1 "
+               "that was optimized away")(y, *cond)
    if y.dtype == node.outputs[0].dtype:
        return [y]
@@ -2828,33 +2817,34 @@ def local_useless_inc_subtensor_alloc(node):
            # Build `z_broad` explicitly to include extra implicit dimensions.
            z_broad = ((True,) * (xi.ndim - z.ndim) + z.broadcastable)
-            cond = [# The shapes of `y` and `xi` must either agree or `y` may
+            cond = [
-                    # also have shape equal to 1 which may be treated as a
+                # The shapes of `y` and `xi` must either agree or `y` may
-                    # broadcastable dimension by the subtensor op.
+                # also have shape equal to 1 which may be treated as a
-                    T.or_(T.eq(y.shape[k], 1), T.eq(y.shape[k], xi.shape[k]))
+                # broadcastable dimension by the subtensor op.
-                    # Loop over all dimensions.
+                T.or_(T.eq(y.shape[k], 1), T.eq(y.shape[k], xi.shape[k]))
-                    for k in xrange(xi.ndim)
+                # Loop over all dimensions.
-                    # We need to check the above shapes, if
+                for k in xrange(xi.ndim)
-                    # * the pre-alloc increment `z` is broadcastable in
+                # We need to check the above shapes, if
-                    #   dimension `k` (if it isn't, then the shapes of `z` and
+                # * the pre-alloc increment `z` is broadcastable in
-                    #   `y` are the same by the definition of the `Alloc` op in
+                # dimension `k` (if it isn't, then the shapes of `z` and
-                    #   this dimension and replacing `y` by `z` will not hide a
+                # `y` are the same by the definition of the `Alloc` op in
-                    #   shape error), and
+                # this dimension and replacing `y` by `z` will not hide a
-                    # * `xi` and `y` do not have the same shape in dimension
+                # shape error), and
-                    #   `k` or we cannot infer the shape statically (if the
+                # * `xi` and `y` do not have the same shape in dimension
-                    #   shapes of `xi` and `y` are not the same, then replacing
+                # `k` or we cannot infer the shape statically (if the
-                    #   `y` by `z` will hide the shape error of `y`), and
+                # shapes of `xi` and `y` are not the same, then replacing
-                    # * the shape of `y` is not equal to 1 or we cannot infer
+                # `y` by `z` will hide the shape error of `y`), and
-                    #   the shape statically (if the shape of `y` is equal to
+                # * the shape of `y` is not equal to 1 or we cannot infer
-                    #   1, then `y` is broadcasted by the inc_subtensor op
+                # the shape statically (if the shape of `y` is equal to
-                    #   internally, so the shapes of `xi` and `y` do not need
+                # 1, then `y` is broadcasted by the inc_subtensor op
-                    #   to match in dimension `k`; else we need to check at
+                # internally, so the shapes of `xi` and `y` do not need
-                    #   runtime that the shape of `y` is either 1 or the same
+                # to match in dimension `k`; else we need to check at
-                    #   as `xi` or otherwise replacing `y` by `z` will hide a
+                # runtime that the shape of `y` is either 1 or the same
-                    #   shape error).
+                # as `xi` or otherwise replacing `y` by `z` will hide a
-                    if (z_broad[k] and
+                # shape error).
-                        not same_shape(xi, y, dim_x=k, dim_y=k) and
+                if (z_broad[k] and
-                        shape_of[y][k] != 1)]
+                    not same_shape(xi, y, dim_x=k, dim_y=k) and
+                    shape_of[y][k] != 1)]
            if len(cond) > 0:
                msg = '`x[i]` and `y` do not have the same shape.'
@@ -2916,7 +2906,7 @@ def local_rebroadcast_lift(node):
        # compilation phase.
        if hasattr(input, 'clients') and len(input.clients) == 1:
            rval = inode.op.make_node(T.Rebroadcast(*list(op.axis.items()))(
-                    inode.inputs[0])).outputs
+                inode.inputs[0])).outputs
            return rval
    if inode and isinstance(inode.op, T.Rebroadcast):
        # the "axis" specification in the outer Rebroadcast overrides
@@ -3031,11 +3021,11 @@ def local_join_make_vector(node):
    for idx in xrange(2, len(node.inputs)):
        inp = node.inputs[idx]
        if (inp.owner and
-            isinstance(inp.owner.op, MakeVector) and
+                isinstance(inp.owner.op, MakeVector) and
-            new_inputs[-1].owner and
+                new_inputs[-1].owner and
-            isinstance(new_inputs[-1].owner.op, MakeVector) and
+                isinstance(new_inputs[-1].owner.op, MakeVector) and
-            # MakeVector have a dtype parameter
+                # MakeVector have a dtype parameter
-            inp.owner.op == new_inputs[-1].owner.op):
+                inp.owner.op == new_inputs[-1].owner.op):
            inps = new_inputs[-1].owner.inputs + inp.owner.inputs
            new_inputs[-1] = inp.owner.op(*inps)
        else:
@@ -3059,7 +3049,7 @@ def local_remove_switch_const_cond(node):
               if cond is constant and cond != 0: left
    """
    if (isinstance(node.op, T.Elemwise) and
-        isinstance(node.op.scalar_op, scalar.basic.Switch)):
+            isinstance(node.op.scalar_op, scalar.basic.Switch)):
        cond = T.extract_constant(node.inputs[0], elemwise=False)
        if type(cond) is numpy.ndarray and cond.ndim == 0:
            if cond == 0:
@@ -3241,9 +3231,9 @@ def local_flatten_lift(node):
    nnet/sigm.py:log1msigm_to_softplus to get applied when there is a flatten.
    """
    if (isinstance(node.op, T.Flatten) and
-        node.inputs[0].owner and
+            node.inputs[0].owner and
-        isinstance(node.inputs[0].owner.op, T.Elemwise) and
+            isinstance(node.inputs[0].owner.op, T.Elemwise) and
-        len(node.inputs[0].owner.inputs) == 1):
+            len(node.inputs[0].owner.inputs) == 1):
        f = node.op(node.inputs[0].owner.inputs[0])
        e = node.inputs[0].owner.op(f)
        return [e]
@@ -3290,9 +3280,9 @@ def local_reshape_lift(node):
    nnet/sigm.py:log1msigm_to_softplus to get applied when there is a reshape.
    """
    if (isinstance(node.op, T.Reshape) and
-        node.inputs[0].owner and
+            node.inputs[0].owner and
-        isinstance(node.inputs[0].owner.op, T.Elemwise) and
+            isinstance(node.inputs[0].owner.op, T.Elemwise) and
-        len(node.inputs[0].owner.inputs) == 1):
+            len(node.inputs[0].owner.inputs) == 1):
        r = node.op(node.inputs[0].owner.inputs[0], node.inputs[1])
        e = node.inputs[0].owner.op(r)
        # In rare case the original broadcast was (False, True), but
@@ -3539,7 +3529,7 @@ class Canonizer(gof.LocalOptimizer):
                return [input], []
        if input.owner is None or input.owner.op not in [
-            self.main, self.inverse, self.reciprocal]:
+                self.main, self.inverse, self.reciprocal]:
            if input.owner and isinstance(input.owner.op, T.DimShuffle):
                # If input is a DimShuffle of some input which does
                # something like this:
@@ -3552,9 +3542,9 @@ class Canonizer(gof.LocalOptimizer):
                # the num/denum of its input
                dsn = input.owner    # dimshuffle node
                dsop = dsn.op        # dimshuffle op
-                dsi0 = dsn.inputs[0]  # the first input of the
-                                      # dimshuffle i.e. the ndarray to
+                # the first input of the dimshuffle i.e. the ndarray to redim
-                                      # redim
+                dsi0 = dsn.inputs[0]
                # The compatible order is a DimShuffle "new_order" of the form:
                # ('x', ..., 'x', 0, 1, 2, ..., dimshuffle_input.type.ndim)
@@ -3566,9 +3556,9 @@ class Canonizer(gof.LocalOptimizer):
                # different numbers of dimensions (hence why we can
                # discard its information - we know we can retrieve it
                # later on).
-                compatible_order = ('x',) * (input.type.ndim
+                compatible_order = (('x',) *
-                                             - dsi0.type.ndim) + tuple(
+                                    (input.type.ndim - dsi0.type.ndim) +
-                    range(dsi0.type.ndim))
+                                    tuple(range(dsi0.type.ndim)))
                if dsop.new_order == compatible_order:
                    # If the "new_order" is the one we recognize,
                    # we return the num_denum of the dimshuffled input.
@@ -3815,9 +3805,9 @@ class Canonizer(gof.LocalOptimizer):
        new = self.merge_num_denum(num, denum)
        if new.type.dtype != out.type.dtype:
-            #new = T.fill(out, new)
+            # new = T.fill(out, new)
            elem_op = T.Elemwise(scalar.Identity(scalar.specific_out(
-                        getattr(scalar, out.type.dtype))))
+                getattr(scalar, out.type.dtype))))
            new = elem_op(new)
        assert (new.type == out.type) == (not (new.type != out.type))
@@ -3833,12 +3823,12 @@ class Canonizer(gof.LocalOptimizer):
        else:
            _logger.warning(' '.join(('CANONIZE FAILED: new, out = ',
                                      new, ',', out, 'types',
-                new.type, ',', out.type)))
+                                      new.type, ',', out.type)))
            return False
    def __str__(self):
        return getattr(self, 'name', 'Canonizer(%s, %s, %s)' % (
-                self.main, self.inverse, self.reciprocal))
+            self.main, self.inverse, self.reciprocal))
 def mul_calculate(num, denum, aslist=False, out_type=None):
@@ -3872,7 +3862,7 @@ register_canonicalize(local_mul_canonizer, name='local_mul_canonizer')
 def local_neg_to_mul(node):
    if node.op == T.neg:
        return [T.mul(numpy.array(-1, dtype=node.inputs[0].dtype),
-            node.inputs[0])]
+                node.inputs[0])]
 register_canonicalize(local_neg_to_mul)
@@ -3924,10 +3914,10 @@ def local_elemwise_sub_zeros(node):
    """
    Elemwise{sub}(X,X) -> zeros_like(X)
    """
-    if (isinstance(node.op, T.Elemwise)
+    if (isinstance(node.op, T.Elemwise) and
-        and node.op.scalar_op.nin == 2
+            node.op.scalar_op.nin == 2 and
-        and node.op.scalar_op == scalar.sub
+            node.op.scalar_op == scalar.sub and
-        and node.inputs[0] == node.inputs[1]):
+            node.inputs[0] == node.inputs[1]):
        return [T.zeros_like(node.inputs[0])]
@@ -4013,9 +4003,8 @@ def local_sum_div_dimshuffle(node):
                                         ' to False.')
                        new_denom = T.DimShuffle(
-                                    thing_dimshuffled.type.broadcastable,
+                            thing_dimshuffled.type.broadcastable,
-                                    new_new_order
+                            new_new_order)(thing_dimshuffled)
-                                    )(thing_dimshuffled)
                    return [T.true_div(node.op(numerator), new_denom)]
                # else:
                #    print 'incompatible dims:', axis, new_order
@@ -4052,8 +4041,9 @@ def local_op_of_op(node):
        # We manipulate the graph so this is done to make sure the opt
        # doesn't affect other computations.
        if len(node_inps.clients) == 1:
-            if (node_inps.owner and (isinstance(node_inps.owner.op, T.elemwise.Prod)
+            if (node_inps.owner and
-                    or isinstance(node_inps.owner.op, T.elemwise.Sum))):
+                    (isinstance(node_inps.owner.op, T.elemwise.Prod) or
+                     isinstance(node_inps.owner.op, T.elemwise.Sum))):
                # check to see either the inner or outer prod is doing a
                # product over all axis, in which case we can remove it
@@ -4074,7 +4064,6 @@ def local_op_of_op(node):
                assert len(newaxis) == len(list(node_inps.owner.op.axis) +
                                           list(node.op.axis))
                # The old bugged logic. We keep it there to generate a warning
                # when we generated bad code.
                alldims = list(range(node_inps.owner.inputs[0].type.ndim))
@@ -4087,20 +4076,20 @@ def local_op_of_op(node):
                               if i not in alldims]
                if (theano.config.warn.sum_sum_bug and
-                    newaxis != newaxis_old and
+                        newaxis != newaxis_old and
-                    len(newaxis) == len(newaxis_old)):
+                        len(newaxis) == len(newaxis_old)):
                    _logger.warn(
-                            "WARNING (YOUR CURRENT CODE IS FINE): Theano "
+                        "WARNING (YOUR CURRENT CODE IS FINE): Theano "
-                            "versions between version 9923a40c7b7a and August "
+                        "versions between version 9923a40c7b7a and August "
-                            "2nd, 2010 generated bugged code in this case. "
+                        "2nd, 2010 generated bugged code in this case. "
-                            "This happens when there are two consecutive sums "
+                        "This happens when there are two consecutive sums "
-                            "in the graph and the intermediate sum is not "
+                        "in the graph and the intermediate sum is not "
-                            "used elsewhere in the code. Some safeguard "
+                        "used elsewhere in the code. Some safeguard "
-                            "removed some bad code, but not in all cases. You "
+                        "removed some bad code, but not in all cases. You "
-                            "are in one such case. To disable this warning "
+                        "are in one such case. To disable this warning "
-                            "(that you can safely ignore since this bug has "
+                        "(that you can safely ignore since this bug has "
-                            "been fixed) set the theano flag "
+                        "been fixed) set the theano flag "
-                            "`warn.sum_sum_bug` to False.")
+                        "`warn.sum_sum_bug` to False.")
                combined = opt_type(newaxis, dtype=out_dtype)
                return [combined(node_inps.owner.inputs[0])]
@@ -4126,9 +4115,8 @@ def local_reduce_join(node):
    """
    if (isinstance(node.op, T.CAReduce) and
-        node.inputs[0].owner and
+            node.inputs[0].owner and
-        isinstance(node.inputs[0].owner.op, T.Join)):
+            isinstance(node.inputs[0].owner.op, T.Join)):
        join = node.inputs[0].owner
        if T.extract_constant(join.inputs[0]) != 0:
            return
@@ -4149,7 +4137,8 @@ def local_reduce_join(node):
            if not inp:
                return
            if (not isinstance(inp.op, DimShuffle) or
-                inp.op.new_order != ('x',) + tuple(range(inp.inputs[0].ndim))):
+                    inp.op.new_order != ('x',) +
+                    tuple(range(inp.inputs[0].ndim))):
                return
            new_inp.append(inp.inputs[0])
        ret = Elemwise(node.op.scalar_op)(*new_inp)
@@ -4174,8 +4163,7 @@ def local_reduce_join(node):
                    'optimization, that modified the pattern '
                    '"Reduce{scalar.op}(Join(axis=0, a, b), axis=0)", '
                    'did not check the reduction axis. So if the '
-                    'reduction axis was not 0, you got a wrong answer.'
+                    'reduction axis was not 0, you got a wrong answer.'))
-                    ))
            return
        # We add the new check late to don't add extra warning.
@@ -4204,7 +4192,7 @@ def local_cut_useless_reduce(node):
 # theano/tensor/tests/test_opt.py:T_local_reduce.test_local_reduce_broadcast_some_0
 # see gh-790 issue.
 #
-#@register_canonicalize
+# @register_canonicalize
 @register_uncanonicalize
 @register_specialize
 @gof.local_optimizer(ALL_REDUCE)
@@ -4258,7 +4246,7 @@ def local_opt_alloc(node):
            input = node_inps.owner.inputs[0]
            shapes = node_inps.owner.inputs[1:]
            if (node.op.axis is None or
-                node.op.axis == tuple(range(input.ndim))):
+                    node.op.axis == tuple(range(input.ndim))):
                try:
                    val = get_scalar_constant_value(input)
                    assert val.size == 1
@@ -4346,7 +4334,7 @@ register_canonicalize(local_mul_zero)
 @gof.local_optimizer([T.true_div])
 def local_div_to_inv(node):
    if node.op == T.true_div and N.all(
-        local_mul_canonizer.get_constant(node.inputs[0]) == 1.0):
+            local_mul_canonizer.get_constant(node.inputs[0]) == 1.0):
        out = node.outputs[0]
        new_out = T.inv(local_mul_canonizer.merge_num_denum(node.inputs[1:],
                                                            []))
@@ -4501,7 +4489,8 @@ def local_pow_specialize_device(node):
                if abs(y) > 2:
                    # We fuse all the pow together here to make
                    # compilation faster
-                    rval1 = Elemwise(theano.scalar.Composite(
+                    rval1 = Elemwise(
+                        theano.scalar.Composite(
                            [pow2_scal[0]], [rval1_scal])).make_node(xsym)
                if y < 0:
                    rval = [T.inv(rval1)]
@@ -4566,8 +4555,8 @@ def local_mul_specialize(node):
                else:
                    # The next case would cause a replace by an equivalent case.
                    if (neg and
-                        nb_neg_node == 0 and
+                            nb_neg_node == 0 and
-                        nb_cst == 1):
+                            nb_cst == 1):
                        return
                    elif neg:
                        # Don't add an extra neg node as we can't
@@ -4640,8 +4629,8 @@ def check_for_x_over_absX(numerators, denominators):
    # TODO: this function should dig/search through dimshuffles
    # This won't catch a dimshuffled absolute value
    for den in list(denominators):
-        if (den.owner and den.owner.op == T.abs_
+        if (den.owner and den.owner.op == T.abs_ and
-            and den.owner.inputs[0] in numerators):
+                den.owner.inputs[0] in numerators):
            if den.owner.inputs[0].type.dtype.startswith('complex'):
                # TODO: Make an Op that projects a complex number to
                #      have unit length but projects 0 to 0.  That
@@ -4715,8 +4704,8 @@ def local_log1p(node):
    if node.op == T.log:
        log_arg, = node.inputs
        if log_arg.owner and log_arg.owner.op == T.add:
-            scalars, scalar_inputs, nonconsts = \
+            scalars, scalar_inputs, nonconsts = scalarconsts_rest(
-                    scalarconsts_rest(log_arg.owner.inputs)
+                log_arg.owner.inputs)
            # scalar_inputs are potentially dimshuffled and fill'd scalars
            if scalars and numpy.allclose(numpy.sum(scalars), 1):
                if not nonconsts:
@@ -4748,7 +4737,7 @@ def local_log_add(node):
            if len(zi) != 2:
                # -- upgrading Maximum to handle multiple inputs wasn't trivial
                #    TODO
-                #raise NotImplementedError()
+                # raise NotImplementedError()
                return
            pre_exp = [x.owner.inputs[0] for x in zi
                       if x.owner and x.owner.op == T.exp]
@@ -4945,8 +4934,7 @@ def constant_folding(node):
        storage_map[o] = [None]
        compute_map[o] = [False]
    if (hasattr(node.op, 'python_constant_folding') and
-        node.op.python_constant_folding(node)):
+            node.op.python_constant_folding(node)):
        old_value = getattr(node.op, '_op_use_c_code', False)
        try:
            node.op._op_use_c_code = False
@@ -5037,9 +5025,9 @@ register_specialize(local_one_minus_erf)
 local_one_minus_erf2 = gof.PatternSub((T.add,
                                      1,
                                      (T.mul, -1, (T.erf, 'x'))),
-                                     (T.erfc, 'x'),
+                                      (T.erfc, 'x'),
-                                     allow_multiple_clients=True,
+                                      allow_multiple_clients=True,
-                                     name='local_one_minus_erf2')
+                                      name='local_one_minus_erf2')
 register_canonicalize(local_one_minus_erf2)
 register_stabilize(local_one_minus_erf2)
 register_specialize(local_one_minus_erf2)
@@ -5058,7 +5046,7 @@ register_canonicalize(local_one_plus_neg_erf)
 register_stabilize(local_one_plus_neg_erf)
 register_specialize(local_one_plus_neg_erf)
-#(-1)+erf(x) => -erfc(x) don't need erf(x)+(-1) as the canonicalize
+# (-1)+erf(x) => -erfc(x) don't need erf(x)+(-1) as the canonicalize
 # will put the -1 as the first argument.
 local_erf_minus_one = gof.PatternSub((T.add,
                                      dict(pattern='y', constraint=_is_minus1),
@@ -5124,7 +5112,7 @@ register_canonicalize(local_one_add_neg_erfc)
 register_stabilize(local_one_add_neg_erfc)
 register_specialize(local_one_add_neg_erfc)
-#(-1)+erfc(-x)=>erf(x)
+# (-1)+erfc(-x)=>erf(x)
 local_erf_neg_minus_one = gof.PatternSub((T.add,
                                          dict(pattern='y', constraint=_is_minus1),
                                          (T.erfc, (T.neg, 'x'))),
@@ -5137,7 +5125,7 @@ register_canonicalize(local_erf_neg_minus_one)
 register_stabilize(local_erf_neg_minus_one)
 register_specialize(local_erf_neg_minus_one)
-#(-1)+erfc(-1*x)=>erf(x)
+# (-1)+erfc(-1*x)=>erf(x)
 local_erf_neg_minus_one2 = gof.PatternSub((T.add,
                                           dict(pattern='y', constraint=_is_minus1),
                                           (T.erfc, (T.mul, -1, 'x'))),
@@ -5176,8 +5164,8 @@ def local_log_erfc(node):
    x = node.inputs[0].owner.inputs[0]
    stab_value = (-x ** 2 - T.log(x) - .5 * T.log(numpy.pi) +
-                   T.log(1 - 1 / (2 * x ** 2) + 3 / (4 * x ** 4)
+                  T.log(1 - 1 / (2 * x ** 2) + 3 / (4 * x ** 4) -
-                         - 15 / (8 * x ** 6)))
+                  15 / (8 * x ** 6)))
    if (node.outputs[0].dtype == 'float32' or
            node.outputs[0].dtype == 'float16'):
@@ -5191,8 +5179,8 @@ def local_log_erfc(node):
 # Stability optimization of the grad of log(erfc(x))
-#([y*]exp(-(x**2)))/erfc(x) # The y* is optional
+# ([y*]exp(-(x**2)))/erfc(x) # The y* is optional
-#([y*]exp(x**2))/erfc(-x) => [y*](when x>threashold,
+# ([y*]exp(x**2))/erfc(-x) => [y*](when x>threashold,
 #                            sqrt(pi)*-x/(1-1/(2*x**2)+3/(4*x**4)-15/(8*x**6)))
 # for float64: threshold=26.63 see at the end of the fct for the explaination
 # for float32: threshold=9.3 see at the end of the fct for the explaination
@@ -5226,8 +5214,8 @@ def local_grad_log_erfc_neg(node):
        if mul.owner.inputs[0].owner or len(mul.owner.inputs) != 2:
            return False
        y = mul.owner.inputs[0]
-        if (not mul.owner.inputs[1].owner
+        if (not mul.owner.inputs[1].owner or
-            or mul.owner.inputs[1].owner.op != T.exp):
+                mul.owner.inputs[1].owner.op != T.exp):
            return False
        exp = mul.owner.inputs[1]
@@ -5236,8 +5224,8 @@ def local_grad_log_erfc_neg(node):
    if exp.owner.inputs[0].owner.op == T.neg:
        neg = exp.owner.inputs[0]
-        if (not neg.owner.inputs[0].owner
+        if (not neg.owner.inputs[0].owner or
-            or neg.owner.inputs[0].owner.op != T.sqr):
+                neg.owner.inputs[0].owner.op != T.sqr):
            return False
        sqr = neg.owner.inputs[0]
        x = sqr.owner.inputs[0]
@@ -5279,8 +5267,8 @@ def local_grad_log_erfc_neg(node):
            return False
        if len(mul_neg.owner.inputs) == 2:
-            if (not mul_neg.owner.inputs[1].owner
+            if (not mul_neg.owner.inputs[1].owner or
-                or mul_neg.owner.inputs[1].owner.op != T.sqr):
+                    mul_neg.owner.inputs[1].owner.op != T.sqr):
                return False
            sqr = mul_neg.owner.inputs[1]
            x = sqr.owner.inputs[0]
@@ -5292,8 +5280,8 @@ def local_grad_log_erfc_neg(node):
            return False
        if cst2 != -1:
-            if (not erfc_x.owner or erfc_x.owner.op != T.mul
+            if (not erfc_x.owner or erfc_x.owner.op != T.mul or
-                or len(erfc_x.owner.inputs) != 2):
+                    len(erfc_x.owner.inputs) != 2):
                # todo implement that case
                return False
            if erfc_x.owner.inputs[1] is not mul_neg.owner.inputs[1]:
@@ -5324,12 +5312,12 @@ def local_grad_log_erfc_neg(node):
    # aaron value
    stab_value = (x * T.pow(1 - 1 / (2 * (x ** 2)) +
-                            3 / (4 * (x ** 4)) - 15 / (8 * (x ** 6)), -1)
+                  3 / (4 * (x ** 4)) - 15 / (8 * (x ** 6)), -1) *
-                  * T.cast(T.sqrt(numpy.pi), dtype=x.dtype))
+                  T.cast(T.sqrt(numpy.pi), dtype=x.dtype))
    if x.dtype == 'float32' or x.dtype == 'float16':
        threshold = 9.3
-        #threshold = 10.1
+        # threshold = 10.1
    elif x.dtype == 'float64':
        threshold = 26.641747557
    ret = T.switch(x < threshold, true_div_no_mul, stab_value) * y
@@ -5531,6 +5519,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
    if maker is None:
        def maker(node, scalar_op):
            return OP(scalar_op)
    def local_fuse(node):
        """
        As part of specialization, we fuse two consecutive elemwise Ops of the
@@ -5598,13 +5587,13 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
            # If a variable is used as multiple into to the same node,
            # we still want to fusion. So we take the set.
            if (i.owner and
-                isinstance(i.owner.op, OP) and
+                    isinstance(i.owner.op, OP) and
-                len(set([n for n, idx in i.clients])) == 1 and
+                    len(set([n for n, idx in i.clients])) == 1 and
-                # Do not merge elemwise that don't have the same
+                    # Do not merge elemwise that don't have the same
-                # broadcastable pattern to don't redo duplicate
+                    # broadcastable pattern to don't redo duplicate
-                # computation due to broadcast.
+                    # computation due to broadcast.
-                i.owner.outputs[0].broadcastable == node.outputs[0].broadcastable):
+                    i.owner.outputs[0].broadcastable ==
+                    node.outputs[0].broadcastable):
                do_fusion = True
                try:
                    tmp_s_input = []
@@ -5840,14 +5829,14 @@ def local_add_mul_fusion(node):
    """
    if (not isinstance(node.op, Elemwise) or
-        not isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul))):
+            not isinstance(node.op.scalar_op, (scalar.Add, scalar.Mul))):
        return False
    s_op = node.op.scalar_op.__class__
    for inp in node.inputs:
        if (inp.owner and
-            isinstance(inp.owner.op, Elemwise) and
+                isinstance(inp.owner.op, Elemwise) and
-            isinstance(inp.owner.op.scalar_op, s_op)):
+                isinstance(inp.owner.op.scalar_op, s_op)):
            l = list(node.inputs)
            l.remove(inp)
            return [node.op(*(l + inp.owner.inputs))]
@@ -5882,13 +5871,15 @@ else:
 # just returns the input, it should be removed from the graph to
 # make sure all possible optimizations can be applied.
 register_canonicalize(gof.OpRemove(theano.gradient.consider_constant_),
-    'fast_compile', 'fast_run', name='remove_consider_constant')
+                      'fast_compile', 'fast_run',
+                      name='remove_consider_constant')
 register_canonicalize(gof.OpRemove(theano.gradient.zero_grad_),
-    'fast_compile', 'fast_run', name='remove_zero_grad')
+                      'fast_compile', 'fast_run', name='remove_zero_grad')
 register_canonicalize(gof.OpRemove(theano.gradient.disconnected_grad_),
-    'fast_compile', 'fast_run', name='remove_disconnected_grad')
+                      'fast_compile', 'fast_run',
+                      name='remove_disconnected_grad')
 @register_canonicalize

--- a/theano/tensor/raw_random.py
+++ b/theano/tensor/raw_random.py
 """Define random number Type (`RandomStateType`) and Op (`RandomFunction`)."""
 from __future__ import print_function
-__docformat__ = "restructuredtext en"
 import sys
 from copy import copy
@@ -15,6 +15,8 @@ from theano import gof
 from six import string_types
 from theano.compile import optdb
+__docformat__ = "restructuredtext en"
 class RandomStateType(gof.Type):
    """A Type wrapper for numpy.random.RandomState
@@ -85,13 +87,13 @@ class RandomStateType(gof.Type):
 # Register RandomStateType's C code for ViewOp.
 theano.compile.register_view_op_c_code(
-        RandomStateType,
+    RandomStateType,
-        """
+    """
-        Py_XDECREF(%(oname)s);
+    Py_XDECREF(%(oname)s);
-        %(oname)s = %(iname)s;
+    %(oname)s = %(iname)s;
-        Py_XINCREF(%(oname)s);
+    Py_XINCREF(%(oname)s);
-        """,
+    """,
-        1)
+    1)
 random_state_type = RandomStateType()
@@ -135,9 +137,8 @@ class RandomFunction(gof.Op):
            and self.ndim_added == other.ndim_added
    def __hash__(self):
-        return hash(type(self)) ^ hash(self.fn) \
+        return (hash(type(self)) ^ hash(self.fn) ^ hash(self.outtype) ^
-                ^ hash(self.outtype)  \
+                hash(self.inplace) ^ hash(self.ndim_added))
-                ^ hash(self.inplace) ^ hash(self.ndim_added)
    def __getstate__(self):
        return self.state
@@ -233,7 +234,6 @@ class RandomFunction(gof.Op):
        # copy of r if self.inplace is False
        r, shape, args = inputs[0], inputs[1], inputs[2:]
        assert type(r) == numpy.random.RandomState, (type(r), r)
-        r_orig = r
        # If shape == [], that means no shape is enforced, and numpy is
        # trusted to draw the appropriate number of samples, numpy uses
@@ -245,16 +245,16 @@ class RandomFunction(gof.Op):
            shape = tuple(shape)
        if (shape is not None and
-            self.outtype.ndim != len(shape) + self.ndim_added):
+                self.outtype.ndim != len(shape) + self.ndim_added):
            raise ValueError('Shape mismatch: self.outtype.ndim (%i) !='
                             ' len(shape) (%i) + self.ndim_added (%i)'
-                            % (self.outtype.ndim, len(shape), self.ndim_added))
+                             % (self.outtype.ndim, len(shape), self.ndim_added))
        if not self.inplace:
            r = copy(r)
        rout[0] = r
        rval = self.fn(r, *(args + [shape]))
-        if not isinstance(rval, numpy.ndarray) \
+        if (not isinstance(rval, numpy.ndarray) or
-               or str(rval.dtype) != node.outputs[1].type.dtype:
+                str(rval.dtype) != node.outputs[1].type.dtype):
            rval = theano._asarray(rval, dtype=node.outputs[1].type.dtype)
        # When shape is None, numpy has a tendency to unexpectedly
@@ -288,7 +288,7 @@ class RandomFunction(gof.Op):
    def grad(self, inputs, outputs):
        return [theano.gradient.grad_undefined(self, k, inp,
-                        'No gradient defined through raw random numbers op')
+                'No gradient defined through raw random numbers op')
                for k, inp in enumerate(inputs)]
    def R_op(self, inputs, eval_points):
@@ -325,8 +325,8 @@ def _infer_ndim_bcast(ndim, shape, *args):
        else:
            if shape_ndim != ndim:
                raise ValueError('ndim should be equal to len(shape), but\n',
-                            'ndim = %s, len(shape) = %s, shape = %s'
+                                 'ndim = %s, len(shape) = %s, shape = %s'
-                            % (ndim, shape_ndim, shape))
+                                 % (ndim, shape_ndim, shape))
        bcast = []
        pre_v_shape = []
@@ -353,7 +353,8 @@ def _infer_ndim_bcast(ndim, shape, *args):
                                break
                    else:
                        if n_a_i == 0:
-                            raise ValueError(('Auto-shape of -1 must overlap'
+                            raise ValueError((
+                                'Auto-shape of -1 must overlap'
                                'with the shape of one of the broadcastable'
                                'inputs'))
                        else:
@@ -373,7 +374,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
        # but we need to know ndim
        if not args:
            raise TypeError(('_infer_ndim_bcast cannot infer shape without'
-                ' either shape or args'))
+                             ' either shape or args'))
        template = reduce(lambda a, b: a + b, args)
        v_shape = template.shape
        bcast = template.broadcastable
@@ -463,7 +464,7 @@ def uniform(random_state, size=None, low=0.0, high=1.0, ndim=None, dtype=None):
        dtype = tensor.scal.upcast(theano.config.floatX, low.dtype, high.dtype)
    ndim, size, bcast = _infer_ndim_bcast(ndim, size, low, high)
    op = RandomFunction('uniform',
-            tensor.TensorType(dtype=dtype, broadcastable=bcast))
+                        tensor.TensorType(dtype=dtype, broadcastable=bcast))
    return op(random_state, size, low, high)
@@ -487,7 +488,7 @@ def normal(random_state, size=None, avg=0.0, std=1.0, ndim=None, dtype=None):
        dtype = tensor.scal.upcast(theano.config.floatX, avg.dtype, std.dtype)
    ndim, size, bcast = _infer_ndim_bcast(ndim, size, avg, std)
    op = RandomFunction('normal',
-            tensor.TensorType(dtype=dtype, broadcastable=bcast))
+                        tensor.TensorType(dtype=dtype, broadcastable=bcast))
    return op(random_state, size, avg, std)
@@ -517,7 +518,8 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
        #          p=numpy.asarray([.1, .2, .3], dtype='float64'))
        n = tensor.cast(n, 'int32')
    op = RandomFunction('binomial',
-            tensor.TensorType(dtype=dtype, broadcastable=(False,) * ndim))
+                        tensor.TensorType(dtype=dtype,
+                                          broadcastable=(False,) * ndim))
    return op(random_state, size, n, p)
@@ -583,7 +585,7 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None,
    high = tensor.as_tensor_variable(high)
    ndim, size, bcast = _infer_ndim_bcast(ndim, size, low, high)
    op = RandomFunction(random_integers_helper,
-            tensor.TensorType(dtype=dtype, broadcastable=bcast))
+                        tensor.TensorType(dtype=dtype, broadcastable=bcast))
    return op(random_state, size, low, high)
@@ -719,8 +721,9 @@ def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
        ndim, size, bcast = _infer_ndim_bcast(ndim, size)
    # print "NDIM", ndim, size
    op = RandomFunction(permutation_helper,
-            tensor.TensorType(dtype=dtype, broadcastable=bcast + (False,)),
+                        tensor.TensorType(dtype=dtype,
-            ndim_added=1)
+                                          broadcastable=bcast + (False,)),
+                        ndim_added=1)
    return op(random_state, size, n)
@@ -738,14 +741,11 @@ def multinomial_helper(random_state, n, pvals, size):
        ndim = len(size)
    else:
        ndim = max(n.ndim, pvals.ndim - 1)
-    out_ndim = ndim + 1
    # broadcast n to ndim dimensions and pvals to ndim+1
    if n.ndim > ndim:
-        raise ValueError(
+        raise ValueError('n.ndim (%i) should not be larger than len(size) (%i)'
-            'n.ndim (%i) should not be larger than len(size) (%i)'
+                         % (n.ndim, ndim), n, size)
-            % (n.ndim, ndim),
-                n, size)
    if n.ndim < ndim:
        n = n.reshape((1,) * (ndim - n.ndim) + n.shape)
@@ -788,7 +788,7 @@ def multinomial_helper(random_state, n, pvals, size):
            # because mtrand.pyx has a ValueError that will trigger if
            # sum(pvals[:-1]) > 1.0
            pvi = pvi * (1.0 - 5e-5)
-            #pvi = pvi * .9
+            # pvi = pvi * .9
            pisum = numpy.sum(pvi)
        elif pvi[-1] < 5e-5:  # will this even work?
            pvi = pvi * (1.0 - 5e-5)
@@ -859,8 +859,9 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
    ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, tmp)
    bcast = bcast + (pvals.type.broadcastable[-1],)
    op = RandomFunction(multinomial_helper,
-            tensor.TensorType(dtype=dtype, broadcastable=bcast),
+                        tensor.TensorType(dtype=dtype,
-            ndim_added=1)
+                                          broadcastable=bcast),
+                        ndim_added=1)
    return op(random_state, size, n, pvals)

--- a/theano/tensor/shared_randomstreams.py
+++ b/theano/tensor/shared_randomstreams.py
 """Define RandomStreams, providing random number variables for Theano
 graphs.
 """
-__docformat__ = "restructuredtext en"
 import copy
 import numpy
 from theano.compile.sharedvalue import (SharedVariable, shared_constructor,
                                        shared)
 from theano.tensor import raw_random
+__docformat__ = "restructuredtext en"
 class RandomStateSharedVariable(SharedVariable):
    pass
@@ -77,7 +79,7 @@ class RandomStreams(raw_random.RandomStreamsBase):
        for old_r, new_r in self.state_updates:
            old_r_seed = seedgen.randint(2 ** 30)
            old_r.set_value(numpy.random.RandomState(int(old_r_seed)),
-                    borrow=True)
+                            borrow=True)
    def __getitem__(self, item):
        """Retrieve the numpy RandomState instance associated with a

--- a/theano/tensor/sharedvar.py
+++ b/theano/tensor/sharedvar.py
@@ -41,10 +41,10 @@ def tensor_constructor(value, name=None, strict=False, allow_downcast=None,
        broadcastable = (False,) * len(value.shape)
    type = TensorType(value.dtype, broadcastable=broadcastable)
    return TensorSharedVariable(type=type,
-            value=numpy.array(value, copy=(not borrow)),
+                                value=numpy.array(value, copy=(not borrow)),
-            name=name,
+                                name=name,
-            strict=strict,
+                                strict=strict,
-            allow_downcast=allow_downcast)
+                                allow_downcast=allow_downcast)
 # TensorSharedVariable brings in the tensor operators, is not ideal, but works
@@ -85,8 +85,10 @@ def scalar_constructor(value, name=None, strict=False, allow_downcast=None,
        # Do not pass the dtype to asarray because we want this to fail if
        # strict is True and the types do not match.
        rval = ScalarSharedVariable(type=tensor_type,
-                value=numpy.array(value, copy=True),
+                                    value=numpy.array(value, copy=True),
-                name=name, strict=strict, allow_downcast=allow_downcast)
+                                    name=name,
+                                    strict=strict,
+                                    allow_downcast=allow_downcast)
        return rval
    except Exception:
        traceback.print_exc()

--- a/theano/tensor/slinalg.py
+++ b/theano/tensor/slinalg.py
 import logging
-logger = logging.getLogger(__name__)
-import numpy
 import warnings
 from six.moves import xrange
-from theano.gof import Op, Apply
+import numpy
-from theano.tensor import as_tensor_variable, dot, DimShuffle, Dot
-from theano.tensor.blas import Dot22
-from theano import tensor
-import theano.tensor
-from theano.tensor.opt import (register_stabilize,
-        register_specialize, register_canonicalize)
-from theano.gof import local_optimizer
-from theano.gof.opt import Optimizer
-from theano.gradient import DisconnectedType
 try:
    import scipy.linalg
@@ -24,6 +11,13 @@ except ImportError:
    # some ops (e.g. Cholesky, Solve, A_Xinv_b) won't work
    imported_scipy = False
+from theano import tensor
+import theano.tensor
+from theano.tensor import as_tensor_variable
+from theano.gof import Op, Apply
+logger = logging.getLogger(__name__)
 MATRIX_STRUCTURES = (
    'general',
    'symmetric',
@@ -123,7 +117,6 @@ class CholeskyGrad(Op):
                F[k, k] /= (2 * L[k, k])
        else:
            F = numpy.triu(dz)
-            M = N - 1
            for k in xrange(N - 1, -1, -1):
                for j in xrange(k + 1, N):
                    for i in xrange(j, N):
@@ -182,7 +175,7 @@ class Solve(Op):
        else:
            rval = scipy.linalg.solve(A, b)
        output_storage[0][0] = rval
    # computes shape of x where x = inv(A) * b
    def infer_shape(self, node, shapes):
        Ashape, Bshape = shapes

--- a/theano/tensor/sort.py
+++ b/theano/tensor/sort.py
@@ -28,7 +28,7 @@ class SortOp(theano.Op):
    def make_node(self, input, axis=-1):
        input = theano.tensor.as_tensor_variable(input)
        if (axis is None or
-            (isinstance(axis, theano.Constant) and axis.data is None)):
+                (isinstance(axis, theano.Constant) and axis.data is None)):
            axis = theano.Constant(theano.gof.generic, None)
            # axis=None flattens the array before sorting
            out_type = tensor(dtype=input.dtype, broadcastable=[False])
@@ -45,7 +45,7 @@ class SortOp(theano.Op):
    def infer_shape(self, node, inputs_shapes):
        if (isinstance(node.inputs[1], theano.Constant) and
-            node.inputs[1].data is None):
+                node.inputs[1].data is None):
            # That means axis = None,
            # So the array is flattened before being sorted
            return [(mul(*inputs_shapes[0]),)]
@@ -64,16 +64,17 @@ class SortOp(theano.Op):
            " matrix (and axis is None or 0) and tensor3")
        if a.ndim == 1:
            idx = argsort(*inputs, kind=self.kind, order=self.order)
-#            rev_idx = numpy.where(idx[None, :]==numpy.arange(5)[:,None])[1]
+            # rev_idx = numpy.where(idx[None, :]==numpy.arange(5)[:,None])[1]
            rev_idx = theano.tensor.eq(idx[None, :],
                                       arange(a.shape[0])[:, None]).nonzero()[1]
            inp_grad = output_grads[0][rev_idx]
        elif a.ndim == 2:
            if (axis is None or
-                (isinstance(axis, theano.Constant) and axis.data is None)):
+                    (isinstance(axis, theano.Constant) and axis.data is None)):
                idx = argsort(*inputs, kind=self.kind, order=self.order)
-                rev_idx = theano.tensor.eq(idx[None, :],
+                rev_idx = theano.tensor.eq(
-                                           arange(a.shape[0]*a.shape[1])[:, None]).nonzero()[1]
+                    idx[None, :],
+                    arange(a.shape[0] * a.shape[1])[:, None]).nonzero()[1]
                inp_grad = output_grads[0][rev_idx].reshape(a.shape)
            elif (axis == 0 or
                  (isinstance(axis, theano.Constant) and axis.data == 0)):
@@ -85,7 +86,7 @@ class SortOp(theano.Op):
                indices = self.__get_argsort_indices(a, axis)
                inp_grad = output_grads[0][indices[0], indices[1], indices[2]]
            elif (axis is None or
-                (isinstance(axis, theano.Constant) and axis.data is None)):
+                    (isinstance(axis, theano.Constant) and axis.data is None)):
                rev_idx = self.__get_argsort_indices(a, axis)
                inp_grad = output_grads[0][rev_idx].reshape(a.shape)
        axis_grad = theano.gradient.grad_undefined(
@@ -103,13 +104,13 @@ class SortOp(theano.Op):
          list of lenght len(a.shape) otherwise
        """
-        # The goal is to get gradient wrt input from gradient 
+        # The goal is to get gradient wrt input from gradient
        # wrt sort(input, axis)
        idx = argsort(a, axis, kind=self.kind, order=self.order)
-        # rev_idx is the reverse of previous argsort operation 
+        # rev_idx is the reverse of previous argsort operation
-        rev_idx = argsort(idx, axis, kind=self.kind, order=self.order) 
+        rev_idx = argsort(idx, axis, kind=self.kind, order=self.order)
        if (axis is None or
-            (isinstance(axis, theano.Constant) and axis.data is None)):
+                (isinstance(axis, theano.Constant) and axis.data is None)):
            return rev_idx
        indices = []
        if axis.data >= 0:
@@ -120,7 +121,7 @@ class SortOp(theano.Op):
            if i == axis_data:
                indices.append(rev_idx)
            else:
-                index_shape = [1] * a.ndim 
+                index_shape = [1] * a.ndim
                index_shape[i] = a.shape[i]
                # it's a way to emulate numpy.ogrid[0: a.shape[0], 0: a.shape[1], 0: a.shape[2]]
                indices.append(theano.tensor.arange(a.shape[i]).reshape(index_shape))
@@ -178,28 +179,27 @@ class ArgSortOp(theano.Op):
        return hash(type(self)) ^ hash(self.order) ^ hash(self.kind)
    def __str__(self):
-        return (self.__class__.__name__
+        return (self.__class__.__name__ +
-                + "{%s, %s}" % (self.kind, str(self.order)))
+                "{%s, %s}" % (self.kind, str(self.order)))
    def make_node(self, input, axis=-1):
        input = theano.tensor.as_tensor_variable(input)
        if (axis is None or
-            (isinstance(axis, theano.Constant) and axis.data is None)):
+                (isinstance(axis, theano.Constant) and axis.data is None)):
            axis = theano.Constant(theano.gof.generic, None)
            bcast = [False]
        else:
            axis = theano.tensor.as_tensor_variable(axis)
            bcast = input.type.broadcastable
-        return theano.Apply(self, [input, axis],
+        return theano.Apply(self, [input, axis], [theano.tensor.TensorType(
-            [theano.tensor.TensorType(dtype="int64", broadcastable=bcast)()])
+            dtype="int64", broadcastable=bcast)()])
    def perform(self, node, inputs, output_storage):
        a = inputs[0]
        axis = inputs[1]
        z = output_storage[0]
-        z[0] = theano._asarray(
+        z[0] = theano._asarray(np.argsort(a, axis, self.kind, self.order),
-                np.argsort(a, axis, self.kind, self.order),
+                               dtype=node.outputs[0].dtype)
-                dtype=node.outputs[0].dtype)
    def infer_shape(self, node, inputs_shapes):
        if (isinstance(node.inputs[1], theano.Constant) and

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
 from copy import copy
-import os
 import sys
 from textwrap import dedent
 import warnings
 import logging
-_logger = logging.getLogger("theano.tensor.subtensor")
 import numpy
 from six.moves import xrange
@@ -32,6 +30,7 @@ if config.cxx:
    except ImportError:
        pass
+_logger = logging.getLogger("theano.tensor.subtensor")
 # Do a lazy import of the sparse module
 sparse_module_ref = None
@@ -336,9 +335,9 @@ class Subtensor(Op):
                        theano.tensor.wscalar, theano.tensor.bscalar]
        invalid_tensor_types = [theano.tensor.fscalar, theano.tensor.dscalar,
                                theano.tensor.cscalar, theano.tensor.zscalar]
-        if (isinstance(entry, gof.Variable)
+        if (isinstance(entry, gof.Variable) and
-                and (entry.type in invalid_scal_types
+            (entry.type in invalid_scal_types or
-                     or entry.type in invalid_tensor_types)):
+             entry.type in invalid_tensor_types)):
            raise TypeError("Expected an integer")
        if isinstance(entry, gof.Variable) and entry.type in scal_types:
@@ -346,13 +345,13 @@ class Subtensor(Op):
        elif isinstance(entry, gof.Type) and entry in scal_types:
            return entry
-        if (isinstance(entry, gof.Variable)
+        if (isinstance(entry, gof.Variable) and
-                and entry.type in tensor_types
+                entry.type in tensor_types and
-                and numpy.all(entry.type.broadcastable)):
+                numpy.all(entry.type.broadcastable)):
            return scal.get_scalar_type(entry.type.dtype)
-        elif (isinstance(entry, gof.Type)
+        elif (isinstance(entry, gof.Type) and
-                and entry in tensor_types
+              entry in tensor_types and
-                and numpy.all(entry.broadcastable)):
+              numpy.all(entry.broadcastable)):
            return scal.get_scalar_type(entry.dtype)
        elif slice_ok and isinstance(entry, slice):
            a = entry.start
@@ -425,8 +424,9 @@ class Subtensor(Op):
                             conv(val.step))
            else:
                try:
-                    return get_scalar_constant_value(val,
+                    return get_scalar_constant_value(
-                            only_process_constants=only_process_constants)
+                        val,
+                        only_process_constants=only_process_constants)
                except theano.tensor.NotScalarConstantError:
                    if allow_partial:
                        return val
@@ -477,8 +477,8 @@ class Subtensor(Op):
                    % (input.type, expected_type))
        # infer the broadcasting pattern
-        padded = (self.get_constant_idx((None,)+inputs, allow_partial=True)
+        padded = (self.get_constant_idx((None,) + inputs, allow_partial=True) +
-                  + [slice(None, None, None)] * (x.type.ndim - len(idx_list)))
+                  [slice(None, None, None)] * (x.type.ndim - len(idx_list)))
        broadcastable = []
        for i, (p, bc) in enumerate(izip(padded, x.type.broadcastable)):
            if isinstance(p, slice):
@@ -528,9 +528,9 @@ class Subtensor(Op):
            if isinstance(idx, slice):
                # If it is the default (None, None, None) slice, or a variant,
                # the shape will be xl
-                if ((idx.start in [None, 0])
+                if ((idx.start in [None, 0]) and
-                    and (idx.stop in [None, sys.maxsize])
+                        (idx.stop in [None, sys.maxsize]) and
-                    and (idx.step is None or idx.step == 1)):
+                        (idx.step is None or idx.step == 1)):
                    outshp.append(xl)
                else:
                    cnf = get_canonical_form_slice(idx, xl)[0]
@@ -556,8 +556,7 @@ class Subtensor(Op):
            first = x.zeros_like().astype(theano.config.floatX)
        else:
            first = IncSubtensor(self.idx_list)(x.zeros_like(), gz, *rest)
-        return ([first]
+        return ([first] + [DisconnectedType()()] * len(rest))
-                + [DisconnectedType()()] * len(rest))
    def connection_pattern(self, node):
@@ -1034,8 +1033,7 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
    dim_offset = x.ndim - y.ndim
    for dim in xrange(y.ndim):
-        if (x.broadcastable[dim + dim_offset]
+        if (x.broadcastable[dim + dim_offset] and not y.broadcastable[dim]):
-                and not y.broadcastable[dim]):
            # It is acceptable to try to increment a subtensor with a
            # broadcastable dim with a tensor that is not broadcastable
            # on that dimension. However, its length must then be 1.
@@ -2133,9 +2131,9 @@ class AdvancedIncSubtensor(Op):
        return hash((type(self), self.inplace, self.set_instead_of_inc))
    def __eq__(self, other):
-        return (type(self) == type(other)
+        return (type(self) == type(other) and
-                and self.inplace == other.inplace
+                self.inplace == other.inplace and
-                and self.set_instead_of_inc == other.set_instead_of_inc)
+                self.set_instead_of_inc == other.set_instead_of_inc)
    def __str__(self):
        return "%s{%s, %s}" % (self.__class__.__name__,

--- a/theano/tensor/utils.py
+++ b/theano/tensor/utils.py
@@ -79,11 +79,11 @@ def shape_of_variables(fgraph, input_shapes):
    if not hasattr(fgraph, 'shape_feature'):
        fgraph.attach_feature(theano.tensor.opt.ShapeFeature())
-    input_dims  = [dimension for inp in fgraph.inputs
+    input_dims = [dimension for inp in fgraph.inputs
-                             for dimension in fgraph.shape_feature.shape_of[inp]]
+                  for dimension in fgraph.shape_feature.shape_of[inp]]
    output_dims = [dimension for shape in fgraph.shape_feature.shape_of.values()
-                             for dimension in shape]
+                   for dimension in shape]
    compute_shapes = theano.function(input_dims, output_dims)
@@ -93,8 +93,8 @@ def shape_of_variables(fgraph, input_shapes):
            " interface changed. Now by default, it clones the graph it receives."
            " To have the old behavior, give it this new parameter `clone=False`.")
-    numeric_input_dims  = [dim for inp in fgraph.inputs
+    numeric_input_dims = [dim for inp in fgraph.inputs
-                               for dim in input_shapes[inp]]
+                          for dim in input_shapes[inp]]
    numeric_output_dims = compute_shapes(*numeric_input_dims)
    sym_to_num_dict = dict(izip(output_dims, numeric_output_dims))

--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
 import copy
-import pdb
-import sys
 import traceback as tb
 import warnings
@@ -41,9 +39,9 @@ class _tensor_py_operators:
    # CASTS
    # REMOVED THESE BECAUSE PYTHON appears to require __int__ to return
    # an int. -JB 20081112
-    #def __int__(self): return convert_to_int32(self)
+    # def __int__(self): return convert_to_int32(self)
-    #def __float__(self): return convert_to_float64(self)
+    # def __float__(self): return convert_to_float64(self)
-    #def __complex__(self): return convert_to_complex128(self)
+    # def __complex__(self): return convert_to_complex128(self)
    # COMPARISONS
    _is_nonzero = True
@@ -68,7 +66,6 @@ class _tensor_py_operators:
        rval._is_nonzero = False
        return rval
    def __nonzero__(self):
        # Python 2.x
        return self.__bool__()
@@ -215,7 +212,7 @@ class _tensor_py_operators:
    # DO NOT USE THESE BECAUSE INPLACE OPS SHOULD BE INSERTED
    # BY OPTIMIZATIONS ONLY
-    ## ARITHMETIC - INPLACE
+    # ARITHMETIC - INPLACE
    # def __iadd__(self, other):
    #    return _add_inplace(self, other)
    # def __isub__(self, other):
@@ -642,7 +639,8 @@ class TensorVariable(_tensor_py_operators, Variable):
            elif config.warn_float64 == "raise":
                raise Exception(msg)
            elif config.warn_float64 == 'pdb':
-                import pdb; pdb.set_trace()
+                import pdb
+                pdb.set_trace()
 TensorType.Variable = TensorVariable
@@ -744,8 +742,8 @@ class TensorConstant(_tensor_py_operators, Constant):
    def __init__(self, type, data, name=None):
        Constant.__init__(self, type, data, name)
        if (isinstance(data, numpy.ndarray) and
-            data.ndim > 0 and
+                data.ndim > 0 and
-            len(numpy.unique(data)) == 1):
+                len(numpy.unique(data)) == 1):
            self.tag.unique_value = numpy.unique(data)[0]
        else:
            self.tag.unique_value = None

--- a/theano/tensor/xlogx.py
+++ b/theano/tensor/xlogx.py
@@ -13,12 +13,15 @@ class XlogX(scalar.UnaryScalarOp):
        if x == 0.0:
            return 0.0
        return x * numpy.log(x)
    def impl(self, x):
        return XlogX.st_impl(x)
    def grad(self, inputs, grads):
        x, = inputs
        gz, = grads
        return [gz * (1 + scalar.log(x))]
    def c_code(self, node, name, inputs, outputs, sub):
        x, = inputs
        z, = outputs
@@ -28,7 +31,8 @@ class XlogX(scalar.UnaryScalarOp):
                ? 0.0
                : %(x)s * log(%(x)s);""" % locals()
        raise NotImplementedError('only floatingpoint is implemented')
-scalar_xlogx  = XlogX(scalar.upgrade_to_float, name='scalar_xlogx')
+scalar_xlogx = XlogX(scalar.upgrade_to_float, name='scalar_xlogx')
 xlogx = Elemwise(scalar_xlogx, name='xlogx')
@@ -41,12 +45,15 @@ class XlogY0(scalar.BinaryScalarOp):
        if x == 0.0:
            return 0.0
        return x * numpy.log(y)
    def impl(self, x, y):
        return XlogY0.st_impl(x, y)
    def grad(self, inputs, grads):
        x, y = inputs
        gz, = grads
        return [gz * scalar.log(y), gz * x / y]
    def c_code(self, node, name, inputs, outputs, sub):
        x, y = inputs
        z, = outputs
@@ -56,5 +63,6 @@ class XlogY0(scalar.BinaryScalarOp):
                ? 0.0
                : %(x)s * log(%(y)s);""" % locals()
        raise NotImplementedError('only floatingpoint is implemented')
-scalar_xlogy0  = XlogY0(scalar.upgrade_to_float, name='scalar_xlogy0')
+scalar_xlogy0 = XlogY0(scalar.upgrade_to_float, name='scalar_xlogy0')
 xlogy0 = Elemwise(scalar_xlogy0, name='xlogy0')
--- a/theano/tests/test_flake8.py
+++ b/theano/tests/test_flake8.py
@@ -57,30 +57,17 @@ whitelist_flake8 = [
    "typed_list/tests/test_type.py",
    "typed_list/tests/test_opt.py",
    "typed_list/tests/test_basic.py",
-    "tensor/var.py",
-    "tensor/sharedvar.py",
-    "tensor/inplace.py",
-    "tensor/slinalg.py",
-    "tensor/shared_randomstreams.py",
-    "tensor/subtensor.py",
-    "tensor/elemwise.py",
-    "tensor/xlogx.py",
    "tensor/blas_headers.py",
-    "tensor/utils.py",
    "tensor/type.py",
    "tensor/fourier.py",
-    "tensor/sort.py",
    "tensor/__init__.py",
    "tensor/opt_uncanonicalize.py",
-    "tensor/opt.py",
    "tensor/blas.py",
    "tensor/extra_ops.py",
    "tensor/nlinalg.py",
    "tensor/blas_c.py",
    "tensor/elemwise_cgen.py",
-    "tensor/raw_random.py",
    "tensor/blas_scipy.py",
-    "tensor/basic.py",
    "tensor/tests/test_subtensor.py",
    "tensor/tests/test_utils.py",
    "tensor/tests/test_nlinalg.py",