Merge pull request #5185 from lamblin/fix_debugmode

Fix remaining tests in debugmode

Merge pull request #5185 from lamblin/fix_debugmode
0a89437c · Frédéric Bastien · GitHub · c75bd243 · a89390c1 · 0a89437c
--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
 from __future__ import absolute_import, print_function, division
-# PENDING REWRITE OF tensor_opt.py

 import copy
 import logging
-import pickle
 import os
 import sys
 import time
@@ -13,8 +11,6 @@ import numpy
 from six.moves import xrange
 from nose.plugins.skip import SkipTest
 from nose.tools import assert_raises, assert_true
-from numpy.testing import dec
-from numpy.testing.noseclasses import KnownFailureTest

 import theano
 import theano.scalar as scal
@@ -43,15 +39,14 @@ from theano.tensor.opt import (
    Assert,
    MakeVector,
    make_vector,
-        local_expm1,
    local_canonicalize_alloc
    )
 from theano import tensor
 from theano import tensor as T
 from theano.tensor import scalar, iscalar, lscalar, fscalar, dscalar
-from theano.tensor import vector, ivector, lvector, fvector, dvector
-from theano.tensor import matrix, imatrix, lmatrix, fmatrix, dmatrix, tensor3
-from theano.tensor import scalars, vectors, matrices, fmatrices, dmatrices
+from theano.tensor import vector, lvector, fvector, dvector
+from theano.tensor import matrix, fmatrix, dmatrix, tensor3
+from theano.tensor import vectors, matrices, fmatrices, dmatrices
 from theano.tensor import (
    AdvancedSubtensor,
    AdvancedSubtensor1,
@@ -67,9 +62,8 @@ from theano.tensor import (
    tile
    )
 from theano.tensor.elemwise import DimShuffle
+from theano.tensor.type import values_eq_approx_remove_nan
 from theano.tests import unittest_tools as utt
-from theano.compile.mode import optdb
-from theano.compile import Mode
 from theano.gof.opt import check_stack_trace, out2in
 from nose.plugins.attrib import attr

@@ -78,7 +72,6 @@ if mode_opt == 'FAST_COMPILE':
    mode_opt = 'FAST_RUN'
 mode_opt = theano.compile.mode.get_mode(mode_opt)

-ds = lambda x, y: DimShuffle(x.type.broadcastable, y)(x)
 dimshuffle_lift = out2in(local_dimshuffle_lift)

 _optimizer_stabilize = gof.Query(include=['fast_run'])
@@ -93,6 +86,10 @@ _optimizer_fast_run = gof.Query(include=['fast_run'])
 _optimizer_fast_run = compile.optdb.query(_optimizer_fast_run)


+def ds(x, y):
+    return DimShuffle(x.type.broadcastable, y)(x)
+
+
 def optimize(g, level='fast_run'):
    if level == 'fast_run':
        _optimizer_fast_run.optimize(g)
@@ -137,8 +134,8 @@ class test_dimshuffle_lift(unittest.TestCase):
        x, y, z = inputs()
        e = ds(ds(ds(x, (0, 'x', 1)), (2, 0, 'x', 1)), (1, 0))
        g = FunctionGraph([x], [e])
-        self.assertTrue(str(g) == "[InplaceDimShuffle{1,0}(InplaceDimShuffle{2,0,x,1}"
-                          "(InplaceDimShuffle{0,x,1}(x)))]",
+        self.assertTrue(str(g) == ("[InplaceDimShuffle{1,0}(InplaceDimShuffle{2,0,x,1}"
+                                   "(InplaceDimShuffle{0,x,1}(x)))]"),
                        str(g))
        dimshuffle_lift.optimize(g)
        self.assertTrue(str(g) == "[x]", str(g))
@@ -258,7 +255,7 @@ def test_local_useless_dimshuffle_in_reshape():
    h = FunctionGraph([mat], [reshape_dimshuffle_mat2])
    str_h = str(h)
    useless_dimshuffle_in_reshape.optimize(h)
-    assert_true(str(h) == str(h))
+    assert_true(str(h) == str_h)


 def test_add_canonizer_problem0():
@@ -268,6 +265,7 @@ def test_add_canonizer_problem0():

    r = segment_labels * 5
    f = function([label], r)
+    f(3)


 class test_greedy_distribute(unittest.TestCase):
@@ -299,8 +297,8 @@ class test_greedy_distribute(unittest.TestCase):
        eps = scalar('eps')
        s = scalar('s')

-        #r = theano.tensor.mul(theano.tensor.fill(x, 2.*a), x/a , (y+z) , a)
-        #r = theano.tensor.mul((x/a+y) , a, z)
+        # r = theano.tensor.mul(theano.tensor.fill(x, 2.*a), x/a , (y+z) , a)
+        # r = theano.tensor.mul((x/a+y) , a, z)
        r = tensor.mul(s - 1,
                       eps + x / s,
                       eps + y / s,
@@ -325,16 +323,16 @@ class test_canonize(unittest.TestCase):
    def test_muldiv(self):
        x, y, z = matrices('xyz')
        a, b, c, d = matrices('abcd')
-#        e = (2.0 * x) / (2.0 * y)
-#        e = (2.0 * x) / (4.0 * y)
-#        e = x / (y / z)
-#        e = (x * y) / x
-#        e = (x / y) * (y / z) * (z / x)
-#        e = (a / b) * (b / c) * (c / d)
-#        e = (a * b) / (b * c) / (c * d)
-#        e = 2 * x / 2
-#        e = x / y / x
-#        e = (x / x) * (y / y)
+        # e = (2.0 * x) / (2.0 * y)
+        # e = (2.0 * x) / (4.0 * y)
+        # e = x / (y / z)
+        # e = (x * y) / x
+        # e = (x / y) * (y / z) * (z / x)
+        # e = (a / b) * (b / c) * (c / d)
+        # e = (a * b) / (b * c) / (c * d)
+        # e = 2 * x / 2
+        # e = x / y / x
+        # e = (x / x) * (y / y)
        e = (-1 * x) / y / (-2 * z)
        g = FunctionGraph([x, y, z, a, b, c, d], [e])
        print(pprint(g.outputs[0]))
@@ -354,60 +352,60 @@ class test_canonize(unittest.TestCase):
        shp = (5, 5)
        fx, fy, fz = fmatrices('xyz')
        dx, dy, dz = dmatrices('xyz')
-        fv = fvector('r').dimshuffle('x', 0)
-        dv = dvector('s').dimshuffle('x', 0)
+        # fv = fvector('r').dimshuffle('x', 0)
+        # dv = dvector('s').dimshuffle('x', 0)
        fxv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        fyv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        fzv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
-        fvv = theano._asarray(numpy.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
-        dxv = theano._asarray(numpy.random.rand(*shp), dtype='float64')
-        dyv = theano._asarray(numpy.random.rand(*shp), dtype='float64')
-        dzv = theano._asarray(numpy.random.rand(*shp), dtype='float64')
-        dvv = theano._asarray(numpy.random.rand(shp[0]), dtype='float64').reshape(1, shp[0])
+        # fvv = theano._asarray(numpy.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
+        # dxv = theano._asarray(numpy.random.rand(*shp), dtype='float64')
+        # dyv = theano._asarray(numpy.random.rand(*shp), dtype='float64')
+        # dzv = theano._asarray(numpy.random.rand(*shp), dtype='float64')
+        # dvv = theano._asarray(numpy.random.rand(shp[0]), dtype='float64').reshape(1, shp[0])
        cases = [
            (fx + fy, (fx, fy), (fxv, fyv), 1, 'float32'),
            (fx * fy, (fx, fy), (fxv, fyv), 1, 'float32'),
-#            (fx+fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
-#            (dx+dy+dz,(dx,dy,dz),(dxv,dyv,dzv),1,'float64'),
-#            (fx*fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
-#            (dx*dy*dz,(dx,dy,dz),(dxv,dyv,dzv),1,'float64'),
-#            (fx*fy*(fx+fy+fz),(fx,fy,fz),(fxv,fyv,fzv),2,'float32'),
-#            (dx*dy*(dx+dy+dz),(dx,dy,dz),(dxv,dyv,dzv),2,'float64'),
-#            (fx*fy*(fx+fy+dz),(fx,fy,dz),(dxv,dyv,dzv),2,'float64'),#check mixed type add
-#            (dz*fy*(fx+fy),(fx,fy,dz),(dxv,dyv,dzv),2,'float64'),#check mixed type mul
+            # (fx+fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
+            # (dx+dy+dz,(dx,dy,dz),(dxv,dyv,dzv),1,'float64'),
+            # (fx*fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
+            # (dx*dy*dz,(dx,dy,dz),(dxv,dyv,dzv),1,'float64'),
+            # (fx*fy*(fx+fy+fz),(fx,fy,fz),(fxv,fyv,fzv),2,'float32'),
+            # (dx*dy*(dx+dy+dz),(dx,dy,dz),(dxv,dyv,dzv),2,'float64'),
+            # (fx*fy*(fx+fy+dz),(fx,fy,dz),(dxv,dyv,dzv),2,'float64'),  # check mixed type add
+            # (dz*fy*(fx+fy),(fx,fy,dz),(dxv,dyv,dzv),2,'float64'),  # check mixed type mul
            # check with dimshuffle of constant
-            (fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1, {'custom':
-                 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-            (fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1, {'custom':
-                 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-#            (2+fx+fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
-#            (2*fx*fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
-            (2 + fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1, {
-                'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-            (2 * fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1, {
-                'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-#            (fx*fy*2*(fx+fy+fz),(fx,fy,fz),(fxv,fyv,fzv),2,'float32'),
-#            (fx*fy*(2+fx+fy+fz),(fx,fy,fz),(fxv,fyv,fzv),2,'float32'),
-            (fx * fy * 2 * (fx + fy + fz+2), (fx, fy, fz), (fxv, fyv, fzv), 2, {
-                'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+            (fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1,
+                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+            (fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1,
+                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+            # (2+fx+fy+fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
+            # (2*fx*fy*fz,(fx,fy,fz),(fxv,fyv,fzv),1,'float32'),
+            (2 + fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1,
+                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+            (2 * fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1,
+                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+            # (fx*fy*2*(fx+fy+fz),(fx,fy,fz),(fxv,fyv,fzv),2,'float32'),
+            # (fx*fy*(2+fx+fy+fz),(fx,fy,fz),(fxv,fyv,fzv),2,'float32'),
+            (fx * fy * 2 * (fx + fy + fz + 2), (fx, fy, fz), (fxv, fyv, fzv), 2,
+                {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),

            # check with broadcast of row
-#            (fx+fy+fz+fv,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
-#            (fx*fy*fz*fv,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
-#            (fv+fx+fy+fz,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
-#            (fv*fx*fy*fz,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
-#            (fx*fy*fv*(fx+fy+fz),(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),2,'float32'),
-#            (fx*fy*(fv+fx+fy+fz),(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),2,'float32'),
-#            (fx*fy*fv*(fv+fx+fy+fz),(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),2,'float32'),
-#            (dx+dy+dz+dv,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
-#            (dx*dy*dz*dv,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
-#            (dv+dx+dy+dz,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
-#            (dv*dx*dy*dz,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
-#            (dx*dy*dv*(dx+dy+dz),(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),2,'float64'),
-#            (dx*dy*(dv+dx+dy+dz),(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),2,'float64'),
-#            (dx*dy*dv*(dv+dx+dy+dz),(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),2,'float64'),
+            # (fx+fy+fz+fv,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
+            # (fx*fy*fz*fv,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
+            # (fv+fx+fy+fz,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
+            # (fv*fx*fy*fz,(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),1,'float32'),
+            # (fx*fy*fv*(fx+fy+fz),(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),2,'float32'),
+            # (fx*fy*(fv+fx+fy+fz),(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),2,'float32'),
+            # (fx*fy*fv*(fv+fx+fy+fz),(fx,fy,fz,fv),(fxv,fyv,fzv,fvv),2,'float32'),
+            # (dx+dy+dz+dv,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
+            # (dx*dy*dz*dv,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
+            # (dv+dx+dy+dz,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
+            # (dv*dx*dy*dz,(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),1,'float64'),
+            # (dx*dy*dv*(dx+dy+dz),(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),2,'float64'),
+            # (dx*dy*(dv+dx+dy+dz),(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),2,'float64'),
+            # (dx*dy*dv*(dv+dx+dy+dz),(dx,dy,dz,dv),(dxv,dyv,dzv,dvv),2,'float64'),
            ]  # [10:11]
-#        print cases
+        # print cases

        # We must be sure that the Canonizer is working, but that we don't have other
        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
@@ -456,61 +454,38 @@ class test_canonize(unittest.TestCase):
            (dx + dy + dz, (dx, dy, dz), (dxv, dyv, dzv), 1, 'float64'),
            (fx * fy * fz, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
            (dx * dy * dz, (dx, dy, dz), (dxv, dyv, dzv), 1, 'float64'),
-            (fx * fy * (fx + fy + fz), (fx, fy, fz), (fxv, fyv,
-                 fzv), 2, 'float32'),
-            (dx * dy * (dx + dy + dz), (dx, dy, dz), (dxv, dyv,
-                 dzv), 2, 'float64'),
-            (fx * fy * (fx + fy + dz), (fx, fy, dz), (dxv, dyv, dzv), 2,
-                'float64'),  # check mixed type add
-            (dz * fy * (fx + fy), (fx, fy, dz), (dxv, dyv, dzv), 2,
-                'float64'),  # check mixed type mul
+            (fx * fy * (fx + fy + fz), (fx, fy, fz), (fxv, fyv, fzv), 2, 'float32'),
+            (dx * dy * (dx + dy + dz), (dx, dy, dz), (dxv, dyv, dzv), 2, 'float64'),
+            (fx * fy * (fx + fy + dz), (fx, fy, dz), (dxv, dyv, dzv), 2, 'float64'),  # check mixed type add
+            (dz * fy * (fx + fy), (fx, fy, dz), (dxv, dyv, dzv), 2, 'float64'),  # check mixed type mul
            # check with dimshuffle of constant
            (fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
            (fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
            (2 + fx + fy + fz, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
            (2 * fx * fy * fz, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
-            (2 + fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv,
-                 fzv), 1, 'float32'),
-            (2 * fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv,
-                 fzv), 1, 'float32'),
-            (fx * fy * 2 * (fx+fy+fz), (fx, fy, fz), (fxv, fyv,
-                 fzv), 2, 'float32'),
-            (fx*fy*(2+fx+fy+fz), (fx, fy, fz), (fxv, fyv, fzv), 2, 'float32'),
-            (fx*fy*2*(fx+fy+fz+2), (fx, fy, fz), (fxv, fyv,
-                 fzv), 2, 'float32'),
+            (2 + fx + fy + fz + 2, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
+            (2 * fx * fy * fz * 2, (fx, fy, fz), (fxv, fyv, fzv), 1, 'float32'),
+            (fx * fy * 2 * (fx + fy + fz), (fx, fy, fz), (fxv, fyv, fzv), 2, 'float32'),
+            (fx * fy * (2 + fx + fy + fz), (fx, fy, fz), (fxv, fyv, fzv), 2, 'float32'),
+            (fx * fy * 2 * (fx + fy + fz + 2), (fx, fy, fz), (fxv, fyv, fzv), 2, 'float32'),

            # check with broadcast of row
-            (fx+fy+fz+fv, (fx, fy, fz, fv), (fxv, fyv, fzv,
-                 fvv), 1, 'float32'),
-            (fx*fy*fz*fv, (fx, fy, fz, fv), (fxv, fyv, fzv,
-                 fvv), 1, 'float32'),
-            (fv+fx+fy+fz, (fx, fy, fz, fv), (fxv, fyv, fzv,
-                 fvv), 1, 'float32'),
-            (fv*fx*fy*fz, (fx, fy, fz, fv), (fxv, fyv, fzv,
-                 fvv), 1, 'float32'),
-            (fx*fy*fv*(fx+fy+fz), (fx, fy, fz, fv), (fxv, fyv,
-                 fzv, fvv), 2, 'float32'),
-            (fx*fy*(fv+fx+fy+fz), (fx, fy, fz, fv), (fxv, fyv,
-                 fzv, fvv), 2, 'float32'),
-            (fx*fy*fv*(fv+fx+fy+fz), (fx, fy, fz, fv), (fxv, fyv, fzv,
-                fvv), 2, 'float32'),
-            (dx+dy+dz+dv, (dx, dy, dz, dv), (dxv, dyv, dzv,
-                 dvv), 1, 'float64'),
-            (dx*dy*dz*dv, (dx, dy, dz, dv), (dxv, dyv, dzv,
-                 dvv), 1, 'float64'),
-            (dv+dx+dy+dz, (dx, dy, dz, dv), (dxv, dyv, dzv,
-                 dvv), 1, 'float64'),
-            (dv*dx*dy*dz, (dx, dy, dz, dv), (dxv, dyv, dzv,
-                 dvv), 1, 'float64'),
-            (dx*dy*dv*(dx+dy+dz), (dx, dy, dz, dv), (dxv, dyv,
-                 dzv, dvv), 2, 'float64'),
-            (dx*dy*(dv+dx+dy+dz), (dx, dy, dz, dv), (dxv, dyv,
-                 dzv, dvv), 2, 'float64'),
-            (dx*dy*dv*(dv+dx+dy+dz), (dx, dy, dz, dv), (dxv, dyv, dzv,
-                dvv), 2, 'float64'),
-
+            (fx + fy + fz + fv, (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 1, 'float32'),
+            (fx * fy * fz * fv, (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 1, 'float32'),
+            (fv + fx + fy + fz, (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 1, 'float32'),
+            (fv * fx * fy * fz, (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 1, 'float32'),
+            (fx * fy * fv * (fx + fy + fz), (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 2, 'float32'),
+            (fx * fy * (fv + fx + fy + fz), (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 2, 'float32'),
+            (fx * fy * fv * (fv + fx + fy + fz), (fx, fy, fz, fv), (fxv, fyv, fzv, fvv), 2, 'float32'),
+            (dx + dy + dz + dv, (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 1, 'float64'),
+            (dx * dy * dz * dv, (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 1, 'float64'),
+            (dv + dx + dy + dz, (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 1, 'float64'),
+            (dv * dx * dy * dz, (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 1, 'float64'),
+            (dx * dy * dv * (dx + dy + dz), (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 2, 'float64'),
+            (dx * dy * (dv + dx + dy + dz), (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 2, 'float64'),
+            (dx * dy * dv * (dv + dx + dy + dz), (dx, dy, dz, dv), (dxv, dyv, dzv, dvv), 2, 'float64'),
            ]  # [10:11]
-#        print cases
+        # print cases

        # We must be sure that the Canonizer is working, but that we don't have other
        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
@@ -567,11 +542,11 @@ class test_canonize(unittest.TestCase):
            'local_elemwise_fusion')
        mode = mode.__class__(linker=mode.linker, optimizer=opt)
        # test x / x -> 1
-        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([(fx/fx, [fx], [fxv], 'float32'),
-                                                       (dx/dx, [dx], [dxv], 'float64'),
-                                                       (fv/fv, [fv], [fvv], 'float32'),
-                                                       (dv/dv, [dv], [dvv], 'float64'),
-                                                       ]):
+        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
+                (fx / fx, [fx], [fxv], 'float32'),
+                (dx / dx, [dx], [dxv], 'float64'),
+                (fv / fv, [fv], [fvv], 'float32'),
+                (dv / dv, [dv], [dvv], 'float64')]):
            f = compile.function(list(sym_inputs), g,
                                 mode=mode)
            out = f(*val_inputs)
@@ -590,14 +565,14 @@ class test_canonize(unittest.TestCase):

        # test (x * y) / x -> y
        for id, (g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
-                                                       ((dx*dy)/dx, [dx, dy], [dxv, dyv], 0, 'float64'),
-                                                       ((fx*fy)/fx, [fx, fy], [fxv, fyv], 0, 'float32'),
-                                                       ((dv*dy)/dv, [dv, dy], [dvv, dyv], 0, 'float64'),
-                                                       ((fv*fy)/fv, [fv, fy], [fvv, fyv], 0, 'float32'),
-            # must broadcast as their is a dimshuffle in the computation
-                                                       ((dx*dv)/dx, [dx, dv], [dxv, dvv], 1, 'float64'),
+                ((dx * dy) / dx, [dx, dy], [dxv, dyv], 0, 'float64'),
+                ((fx * fy) / fx, [fx, fy], [fxv, fyv], 0, 'float32'),
+                ((dv * dy) / dv, [dv, dy], [dvv, dyv], 0, 'float64'),
+                ((fv * fy) / fv, [fv, fy], [fvv, fyv], 0, 'float32'),
+                # must broadcast as there is a dimshuffle in the computation
+                ((dx * dv) / dx, [dx, dv], [dxv, dvv], 1, 'float64'),
                # topo: [Elemwise{second,no_inplace}(x, <TensorType(float64, row)>)]
-                                                       ((fx*fv)/fx, [fx, fv], [fxv, fvv], 1, 'float32')
+                ((fx * fv) / fx, [fx, fv], [fxv, fvv], 1, 'float32')
                # topo: [Elemwise{second,no_inplace}(x, <TensorType(float32, row)>)]
                ]):
            f = compile.function(list(sym_inputs), g,
@@ -613,19 +588,17 @@ class test_canonize(unittest.TestCase):

        # test x / y / x -> 1 / y
        for id, (g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate([
-                                                       ((dx/dy)/dx, [dx, dy], [dxv, dyv], 1, 'float64'),
-                                                       ((fx/fy)/fx, [fx, fy], [fxv, fyv], 1, 'float32'),
-                                                       ((dv/dy)/dv, [dv, dy], [dvv, dyv], 1, 'float64'),
-                                                       ((fv/fy)/fv, [fv, fy], [fvv, fyv], 1, 'float32'),
+                ((dx / dy) / dx, [dx, dy], [dxv, dyv], 1, 'float64'),
+                ((fx / fy) / fx, [fx, fy], [fxv, fyv], 1, 'float32'),
+                ((dv / dy) / dv, [dv, dy], [dvv, dyv], 1, 'float64'),
+                ((fv / fy) / fv, [fv, fy], [fvv, fyv], 1, 'float32'),
                # must broadcast as their is a dimshuffle in the computation
-
-                                                       ((dx/dv)/dx, [dx, dv], [dxv, dvv], 1, 'float64'),
-# topo:            [Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Alloc]
-                                                       ((fx/fv)/fx, [fx, fv], [fxv, fvv], 1, 'float32'),
-            # topo:[Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Alloc]
+                ((dx / dv) / dx, [dx, dv], [dxv, dvv], 1, 'float64'),
+                # topo: [Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float64, row)>), Alloc]
+                ((fx / fv) / fx, [fx, fv], [fxv, fvv], 1, 'float32'),
+                # topo: [Shape_i, Shape_i, Elemwise{inv,no_inplace}(<TensorType(float32, row)>), Alloc]
                ]):
-            f = compile.function(list(sym_inputs), g,
-                                 mode=mode)
+            f = compile.function(list(sym_inputs), g, mode=mode)
            out = f(*val_inputs)
            utt.assert_allclose(out, (1 / val_inputs[1]))
            topo = f.maker.fgraph.toposort()
@@ -649,58 +622,50 @@ class test_canonize(unittest.TestCase):
                ((dx / dy) * (dy / dz) * (dz / dv), [dx, dy, dz, dv], [dxv, dyv, dzv, dvv], 'float64'),
                ((fx / fy) * (fy / fz) * (fz / fv), [fx, fy, fz, fv], [fxv, fyv, fzv, fvv], 'float32'),
                ]):
-            f = compile.function(list(sym_inputs), g,
-                                 mode=mode)
+            f = compile.function(list(sym_inputs), g, mode=mode)
            out = f(*val_inputs)
            utt.assert_allclose(out, (val_inputs[0] / val_inputs[3]))
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 1
            assert isinstance(topo[0].op, (T.Elemwise, ))
-            assert isinstance(topo[0].op.scalar_op,
-                theano.scalar.basic.TrueDiv)
+            assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.TrueDiv)
            assert len(topo[0].inputs) == 2
            assert(out_dtype == out.dtype)

        # test (2.0 * x) / (4.0 * y) -> (0.5 * x) / y
        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
-                                                       (((2.0*dx)/(4.0*dy)), [dx, dy], [dxv, dyv], 'float64'),
-                                                       (((2.0*fx)/(4.0*fy)), [fx, fy], [fxv, fyv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-                                                       (((2.0*dv)/(4.0*dy)), [dv, dy], [dvv, dyv], 'float64'),
-                                                       (((2.0*fv)/(4.0*fy)), [fv, fy], [fvv, fyv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-                                                       (((2.0*dx)/(4.0*dv)), [dx, dv], [dxv, dvv], 'float64'),
-                                                       (((2.0*fx)/(4.0*fv)), [fx, fv], [fxv, fvv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+                (((2.0 * dx) / (4.0 * dy)), [dx, dy], [dxv, dyv], 'float64'),
+                (((2.0 * fx) / (4.0 * fy)), [fx, fy], [fxv, fyv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+                (((2.0 * dv) / (4.0 * dy)), [dv, dy], [dvv, dyv], 'float64'),
+                (((2.0 * fv) / (4.0 * fy)), [fv, fy], [fvv, fyv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+                (((2.0 * dx) / (4.0 * dv)), [dx, dv], [dxv, dvv], 'float64'),
+                (((2.0 * fx) / (4.0 * fv)), [fx, fv], [fxv, fvv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
                ]):
-
            if isinstance(out_dtype, dict):
                out_dtype = out_dtype[config.cast_policy]
-            f = compile.function(list(sym_inputs), g,
-                                 mode=mode)
+            f = compile.function(list(sym_inputs), g, mode=mode)
            out = f(*val_inputs)
-            utt.assert_allclose(out, (0.5 *
-                 val_inputs[0] / val_inputs[1]))
+            utt.assert_allclose(out, (0.5 * val_inputs[0] / val_inputs[1]))
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, (T.Elemwise, ))
-            assert isinstance(topo[0].op.scalar_op,
-                 theano.scalar.basic.Mul)
+            assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.Mul)
            assert len(topo[0].inputs) == 2
            assert isinstance(topo[1].op, (T.Elemwise, ))
-            assert isinstance(topo[1].op.scalar_op,
-                theano.scalar.basic.TrueDiv)
+            assert isinstance(topo[1].op.scalar_op, theano.scalar.basic.TrueDiv)
            assert len(topo[1].inputs) == 2
            assert(out_dtype == out.dtype)

        # test 2 * x / 2 -> x
        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
-                                                       ((2*dx)/2, [dx], [dxv], 'float64'),
-                                                       ((2*fx)/2, [fx], [fxv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-                                                       ((2*dv)/2, [dv], [dvv], 'float64'),
-                                                       ((2*fv)/2, [fv], [fvv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+                ((2 * dx) / 2, [dx], [dxv], 'float64'),
+                ((2 * fx) / 2, [fx], [fxv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
+                ((2 * dv) / 2, [dv], [dvv], 'float64'),
+                ((2 * fv) / 2, [fv], [fvv], {'custom': 'float32', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
                ]):
            if isinstance(out_dtype, dict):
                out_dtype = out_dtype[config.cast_policy]
-            f = compile.function(list(sym_inputs), g,
-                                 mode=mode)
+            f = compile.function(list(sym_inputs), g, mode=mode)
            out = f(*val_inputs)
            utt.assert_allclose(out, val_inputs[0])
            topo = f.maker.fgraph.toposort()
@@ -710,15 +675,14 @@ class test_canonize(unittest.TestCase):

        # test x / abs(x) -> sign(x)
        for id, (g, sym_inputs, val_inputs, out_dtype) in enumerate([
-                                                       (dx/abs(dx), [dx], [0.5-dxv], 'float64'),
-                                                       (fx/abs(fx), [fx], [0.5-fxv], 'float32'),
-                                                       (dx/abs(dx), [dx], [0.1*dxv], 'float64'),
-                                                       (fx/abs(fx), [fx], [0.1*fxv], 'float32'),
-                                                       (dv/abs(dv), [dv], [0.5-dvv], 'float64'),
-                                                       (fv/abs(fv), [fv], [0.5-fvv], 'float32'),
+                (dx / abs(dx), [dx], [0.5 - dxv], 'float64'),
+                (fx / abs(fx), [fx], [0.5 - fxv], 'float32'),
+                (dx / abs(dx), [dx], [0.1 * dxv], 'float64'),
+                (fx / abs(fx), [fx], [0.1 * fxv], 'float32'),
+                (dv / abs(dv), [dv], [0.5 - dvv], 'float64'),
+                (fv / abs(fv), [fv], [0.5 - fvv], 'float32'),
                ]):
-            f = compile.function(list(sym_inputs), g,
-                                 mode=mode)
+            f = compile.function(list(sym_inputs), g, mode=mode)
            out = f(*val_inputs)
            assert numpy.all(numpy.isfinite(out))
            utt.assert_allclose(out, numpy.sign(val_inputs[0]))
@@ -755,7 +719,7 @@ class test_canonize(unittest.TestCase):
        """

        x = T.dscalar()
-        a = T.abs_(x)
+        # a = T.abs_(x)

        if theano.config.mode == 'FAST_COMPILE':
            mode = theano.compile.mode.get_mode('FAST_RUN').excluding(
@@ -803,49 +767,43 @@ class test_canonize(unittest.TestCase):
        dxv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        dyv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
        dzv = theano._asarray(numpy.random.rand(*shp), dtype='float32')
-        fvv = theano._asarray(numpy.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
+        # fvv = theano._asarray(numpy.random.rand(shp[0]), dtype='float32').reshape(1, shp[0])
        # We must be sure that the Canonizer is working, but that we don't have other
        # optimisation that could hide bug in the Canonizer as local_elemwise_fusion
        mode = compile.mode.get_default_mode()

        opt = gof.Query(["canonicalize"])
-        opt = opt.excluding(
-            'local_elemwise_fusion')
+        opt = opt.excluding('local_elemwise_fusion')
        mode = mode.__class__(linker=mode.linker, optimizer=opt)
-# test fail!
+        # test fail!
        # test x / y / z -> x / (y * z)
        for (g, sym_inputs, val_inputs, out_dtype) in [
-                                                       ((dx/dy)/dz, [dx, dy, dz], [dxv, dyv, dzv], 'float64'),
-                                                       ((fx/fy)/fz, [fx, fy, fz], [fxv, fyv, fzv], 'float32')
+                ((dx / dy) / dz, [dx, dy, dz], [dxv, dyv, dzv], 'float64'),
+                ((fx / fy) / fz, [fx, fy, fz], [fxv, fyv, fzv], 'float32')
                ]:
-            f = compile.function(list(sym_inputs), g,
-                                 mode=mode)
+            f = compile.function(list(sym_inputs), g, mode=mode)
            out = f(*val_inputs)
-            utt.assert_allclose(out, val_inputs[0] /
-                val_inputs[1] / val_inputs[2])
+            utt.assert_allclose(out, val_inputs[0] / val_inputs[1] / val_inputs[2])
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, (T.Elemwise, ))
-            assert isinstance(topo[0].op.scalar_op,
-                 theano.scalar.basic.Inv)
+            assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.Inv)
            assert len(topo[0].inputs) == 1
            assert(out_dtype == out.dtype)

        # test x / (y / z) -> (x * z) / y
        for (g, sym_inputs, val_inputs, out_dtype) in [
-                                                       (dx/(dy/dz), [dx, dy, dz], [dxv, dyv, dzv], 'float64'),
-                                                       (fx/(fy/fz), [fx, fy, fz], [fxv, fyv, fzv], 'float32')
+                (dx / (dy / dz), [dx, dy, dz], [dxv, dyv, dzv], 'float64'),
+                (fx / (fy / fz), [fx, fy, fz], [fxv, fyv, fzv], 'float32')
                ]:
            f = compile.function(list(sym_inputs), g,
                                 mode=mode)
            out = f(*val_inputs)
-            utt.assert_allclose(out, val_inputs[0] / (
-                val_inputs[1] / val_inputs[2]))
+            utt.assert_allclose(out, val_inputs[0] / (val_inputs[1] / val_inputs[2]))
            topo = f.maker.fgraph.toposort()
            assert len(topo) == 2
            assert isinstance(topo[0].op, (T.Elemwise, ))
-            assert isinstance(topo[0].op.scalar_op,
-                 theano.scalar.basic.Inv)
+            assert isinstance(topo[0].op.scalar_op, theano.scalar.basic.Inv)
            assert len(topo[0].inputs) == 1
            assert(out_dtype == out.dtype)

@@ -867,7 +825,7 @@ class test_canonize(unittest.TestCase):
        logging.getLogger('theano.gof.opt').addHandler(handler)
        try:
            x = vector()
-            f = theano.function([x], x + numpy.nan)
+            theano.function([x], x + numpy.nan)
        finally:
            logging.getLogger('theano.gof.opt').removeHandler(handler)
        # Ideally this test would only catch the maxed out equilibrium
@@ -959,7 +917,6 @@ class test_fusion(unittest.TestCase):
        """
        # TODO: disable the canonizer?
        def my_init(shp, dtype='float64', num=0):
-            #ret = theano._asarray(numpy.random.rand(*shp),dtype=dtype)
            ret = numpy.zeros(shp, dtype=dtype) + num
            return ret
        fw, fx, fy, fz = [theano.tensor.tensor(dtype='float32',
@@ -1007,144 +964,133 @@ class test_fusion(unittest.TestCase):
                fwv + fxv + fyv + fzv, 'float32'),
            (fw + (fx + (fy + fz)), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
                fwv + fxv + fyv + fzv, 'float32'),
-            ((fw+fx)+(fy+fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
-                fwv+fxv+fyv+fzv, 'float32'),  # 10
-            (fw*fx*fy*fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
+            ((fw + fx) + (fy + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
+                fwv + fxv + fyv + fzv, 'float32'),  # 10
+            (fw * fx * fy * fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
                fwv * fxv * fyv * fzv, 'float32'),
-            (fw+fx*fy*fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
+            (fw + fx * fy * fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
                fwv + fxv * fyv * fzv, 'float32'),
-            (fx+fy*fz*fx, (fx, fy, fz), (fxv, fyv, fzv), 1,
+            (fx + fy * fz * fx, (fx, fy, fz), (fxv, fyv, fzv), 1,
                fxv + fyv * fzv * fxv, 'float32'),
-            (fx*fy+fz+fy, (fx, fy, fz), (fxv, fyv, fzv), 1,
+            (fx * fy + fz + fy, (fx, fy, fz), (fxv, fyv, fzv), 1,
                fxv * fyv + fzv + fyv, 'float32'),
-            (fx*fy*fz*fw+fx+fy+fz+fw, (fw, fx, fy, fz), (fwv, fxv,
-                fyv, fzv), 1, fxv*fyv*fzv*fwv+fxv+fyv+fzv+fwv, 'float32'),  # 15
+            (fx * fy * fz * fw + fx + fy + fz + fw, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv), 1,
+                fxv * fyv * fzv * fwv + fxv + fyv + fzv + fwv, 'float32'),  # 15
            # test with constant
-            ((fw+fx)+(fy+fz) + 2., (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
-                1, fwv+fxv+fyv+fzv+2, 'float32'),
-            (((fw+fx)+2.+fy)+fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
-                1, fwv+fxv+fyv+fzv+2, 'float32'),
-            ((fw+(fx+2.+fy))+fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
-                1, fwv+fxv+fyv+fzv+2, 'float32'),
-            ((fw+(fx+fy)+2+fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
-                1, fwv+fxv+fyv+fzv+2, 'float32'),
-            (fw+(fx+(fy+fz)+2.), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
-                1, fwv+fxv+fyv+fzv+2, 'float32'),  # 20
-            (2+(fw+fx)+(fy+fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
-                1, fwv+fxv+fyv+fzv+2, 'float32'),
+            ((fw + fx) + (fy + fz) + 2., (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
+                1, fwv + fxv + fyv + fzv + 2, 'float32'),
+            (((fw + fx) + 2. + fy) + fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
+                1, fwv + fxv + fyv + fzv + 2, 'float32'),
+            ((fw + (fx + 2. + fy)) + fz, (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
+                1, fwv + fxv + fyv + fzv + 2, 'float32'),
+            ((fw + (fx + fy) + 2 + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
+                1, fwv + fxv + fyv + fzv + 2, 'float32'),
+            (fw + (fx + (fy + fz) + 2.), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
+                1, fwv + fxv + fyv + fzv + 2, 'float32'),  # 20
+            (2 + (fw + fx) + (fy + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
+                1, fwv + fxv + fyv + fzv + 2, 'float32'),
            # mix float32 and float64
-            (2+(dw+fx)+(fy+fz), (dw, fx, fy, fz), (dwv, fxv, fyv, fzv),
-                1, dwv+fxv+fyv+fzv+2, 'float64'),
-            (2+(fw+dw)+(fy+fz), (fw, dw, fy, fz), (fwv, dwv, fyv, fzv),
-                1, fwv+dwv+fyv+fzv+2, 'float64'),
-            (2+(fw+fx)+(dw+fz), (fw, fx, dw, fz), (fwv, fxv, dwv, fzv),
-                1, fwv+fxv+dwv+fzv+2, 'float64'),
-            (2+(fw+fx)+(fy+dw), (fw, fx, fy, dw), (fwv, fxv, fyv, dwv),
-                1, fwv+fxv+fyv+dwv+2, 'float64'),  # 25
+            (2 + (dw + fx) + (fy + fz), (dw, fx, fy, fz), (dwv, fxv, fyv, fzv),
+                1, dwv + fxv + fyv + fzv + 2, 'float64'),
+            (2 + (fw + dw) + (fy + fz), (fw, dw, fy, fz), (fwv, dwv, fyv, fzv),
+                1, fwv + dwv + fyv + fzv + 2, 'float64'),
+            (2 + (fw + fx) + (dw + fz), (fw, fx, dw, fz), (fwv, fxv, dwv, fzv),
+                1, fwv + fxv + dwv + fzv + 2, 'float64'),
+            (2 + (fw + fx) + (fy + dw), (fw, fx, fy, dw), (fwv, fxv, fyv, dwv),
+                1, fwv + fxv + fyv + dwv + 2, 'float64'),  # 25
            # test when their is other op then elemwise.
-            # the good output for the next test.
-#            (Pdb) p f.maker.fgraph.toposort()
-#[Elemwise{add,no_inplace}(w, x), Sum(Elemwise{add,no_inplace}.0), InplaceDimShuffle{x,x}(Sum.0), Elemwise{Composite{_impls=[<function <lambda> at 0x2c5c8c0>], nin=4, _c_code={
-# npy_float32 V%(id)s_tmp1;
-# V%(id)s_tmp1 = %(i2)s + %(i3)s;
-# npy_float32 V%(id)s_tmp2;
-# V%(id)s_tmp2 = %(i0)s + %(i1)s;
-#%(o0)s = V%(id)s_tmp2 + V%(id)s_tmp1;
-#}
-#, nout=1, fgraph=[add(add(<float32>, <float32>), add(<float32>, <float32>))]}}(InplaceDimShuffle{x,x}.0, Elemwise{add,no_inplace}.0, y, z)]
-            ((fwx.sum())+(fwx)+(fy+fz), (fw, fx, fy, fz), (fwv, fxv,
-                fyv, fzv), 4, (fwv+fxv).sum()+fwv+fxv+fyv+fzv, 'float32'),
+            ((fwx.sum()) + (fwx) + (fy + fz), (fw, fx, fy, fz), (fwv, fxv, fyv, fzv),
+                4, (fwv + fxv).sum() + fwv + fxv + fyv + fzv, 'float32'),
            # test other elemwise op
-            (fx+fy+tensor.cos(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
-                fxv+fyv+numpy.cos(fzv), 'float32'),
-            (fx+fy+tensor.cosh(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
-                fxv+fyv+numpy.cosh(fzv), 'float32'),
-            (fx+fy+abs(fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv+fyv +
+            (fx + fy + tensor.cos(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv + fyv + numpy.cos(fzv), 'float32'),
+            (fx + fy + tensor.cosh(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv + fyv + numpy.cosh(fzv), 'float32'),
+            (fx + fy + abs(fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv + fyv +
                numpy.absolute(fzv), 'float32'),
-            (ix+iy+abs(iz), (ix, iy, iz), (ixv, iyv, izv), 1, ixv+iyv +
+            (ix + iy + abs(iz), (ix, iy, iz), (ixv, iyv, izv), 1, ixv + iyv +
                numpy.absolute(izv), 'int32'),  # 30
-            (fx+fy+theano.tensor.log(fz), (fx, fy, fz), (fxv, fyv,
-                fzv), 1, fxv+fyv+numpy.log(fzv), 'float32'),
-            (fx+fy+theano.tensor.log2(fz), (fx, fy, fz), (fxv, fyv,
-                fzv), 1, fxv+fyv+numpy.log2(fzv), 'float32'),
-            (fx+fy+theano.tensor.log10(fz), (fx, fy, fz), (fxv, fyv,
-                fzv), 1, fxv+fyv+numpy.log10(fzv), 'float32'),
-            (fx+fy**fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv+fyv**fzv,
+            (fx + fy + theano.tensor.log(fz), (fx, fy, fz), (fxv, fyv, fzv),
+                1, fxv + fyv + numpy.log(fzv), 'float32'),
+            (fx + fy + theano.tensor.log2(fz), (fx, fy, fz), (fxv, fyv, fzv),
+                1, fxv + fyv + numpy.log2(fzv), 'float32'),
+            (fx + fy + theano.tensor.log10(fz), (fx, fy, fz), (fxv, fyv, fzv),
+                1, fxv + fyv + numpy.log10(fzv), 'float32'),
+            (fx + fy ** fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv + fyv ** fzv,
                'float32'),  # pow
-            (fx+fy+theano.tensor.exp(fz), (fx, fy, fz), (fxv, fyv,
-                fzv), 1, fxv+fyv+numpy.exp(fzv), 'float32'),  # 35
-            (fx-fy-fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv-fyv-fzv, 'float32'),
-            (fx-(fy/fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv-(fyv/fzv), 'float32'),
-            (fx-theano.tensor.true_div(fy, 2), (fx, fy), (fxv, fyv),
-                1, fxv-(fyv/2), 'float32'),
-            (fx-theano.tensor.true_div(fy, fz), (fx, fy, fz), (fxv,
-                fyv, fzv), 1, fxv-(fyv/fzv), 'float32'),
-            (fx-theano.tensor.int_div(ix*100, iy*1000), (fx, ix,
-                iy), (fxv, ixv, iyv), 1, fxv-((ixv*100)//(iyv*1000)), {'custom': 'float64', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),  # 40
-            (fx-(fy/2), (fx, fy), (fxv, fyv), 1, fxv-(fyv/2), 'float32'),
-            (fx-(fy%fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv-(fyv%fzv), 'float32'),
-            (fx-(fy > fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv-(fyv > fzv), 'float32'),
-            (fx-(fy >= fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv-(fyv >= fzv), 'float32'),
-            (fx-(fy < fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv-(fyv < fzv), 'float32'),  # 45
-            (fx-(fy <= fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv-(fyv <= fzv), 'float32'),
-            (fx-T.eq(fy, fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv-(
-                fyv == fzv), 'float32'),
-            (fx-T.neq(fy, fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv-(
+            (fx + fy + theano.tensor.exp(fz), (fx, fy, fz), (fxv, fyv, fzv),
+                1, fxv + fyv + numpy.exp(fzv), 'float32'),  # 35
+            (fx - fy - fz, (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - fyv - fzv, 'float32'),
+            (fx - (fy / fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv / fzv), 'float32'),
+            (fx - theano.tensor.true_div(fy, 2), (fx, fy), (fxv, fyv),
+                1, fxv - (fyv / 2), 'float32'),
+            (fx - theano.tensor.true_div(fy, fz), (fx, fy, fz), (fxv, fyv, fzv),
+                1, fxv - (fyv / fzv), 'float32'),
+            (fx - theano.tensor.int_div(ix * 100, iy * 1000), (fx, ix, iy), (fxv, ixv, iyv),
+                1, fxv - ((ixv * 100) // (iyv * 1000)), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),  # 40
+            (fx - (fy / 2), (fx, fy), (fxv, fyv), 1, fxv - (fyv / 2), 'float32'),
+            (fx - (fy % fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv % fzv), 'float32'),
+            (fx - (fy > fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv > fzv), 'float32'),
+            (fx - (fy >= fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv >= fzv), 'float32'),
+            (fx - (fy < fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv < fzv), 'float32'),  # 45
+            (fx - (fy <= fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (fyv <= fzv), 'float32'),
+            (fx - T.eq(fy, fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - (fyv == fzv), 'float32'),
+            (fx - T.neq(fy, fz), (fx, fy, fz), (fxv, fyv, fzv), 1, fxv - (
                fyv != fzv), 'float32'),
-            (fx-fy+tensor.tan(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
-                fxv-fyv+numpy.tan(fzv), 'float32'),
-            (fx-fy+tensor.tanh(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
-                fxv-fyv+numpy.tanh(fzv), 'float32'),  # 50
-            (fx-fy+tensor.sin(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
-                fxv-fyv+numpy.sin(fzv), 'float32'),
-            (fx-fy+tensor.sinh(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
-                fxv-fyv+numpy.sinh(fzv), 'float32'),
-            (fx-fy+theano.tensor.sqr(fz), (fx, fy, fz), (fxv, fyv,
-                fzv), 1, fxv-fyv+(fzv*fzv), 'float32'),
-            (fx-fy+theano.tensor.sqrt(fz), (fx, fy, fz), (fxv, fyv,
-                fzv), 1, fxv-fyv+numpy.sqrt(fzv), 'float32'),
-            (fx-fy+theano.tensor.inv(fz), (fx, fy, fz), (fxv, fyv,
-                fzv), 1, fxv-fyv+(1/fzv), 'float32'),  # 55
-            (fx-fy+theano.tensor.neg(fz), (fx, fy, fz), (fxv, fyv,
-                fzv), 1, fxv-fyv+(-fzv), 'float32'),
-            (fx-fy+theano.tensor.round(fz), (fx, fy, fz), (fxv, fyv,
-                fzv), 1, fxv-fyv+numpy.round(fzv), 'float32'),
-            (ix-iy+theano.tensor.iround(fz), (ix, iy, fz), (ixv,
-                iyv, fzv), 1, ixv-iyv+numpy.round(fzv), 'int64'),
+            (fx - fy + tensor.tan(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - fyv + numpy.tan(fzv), 'float32'),
+            (fx - fy + tensor.tanh(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - fyv + numpy.tanh(fzv), 'float32'),  # 50
+            (fx - fy + tensor.sin(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - fyv + numpy.sin(fzv), 'float32'),
+            (fx - fy + tensor.sinh(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - fyv + numpy.sinh(fzv), 'float32'),
+            (fx - fy + theano.tensor.sqr(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - fyv + (fzv * fzv), 'float32'),
+            (fx - fy + theano.tensor.sqrt(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - fyv + numpy.sqrt(fzv), 'float32'),
+            (fx - fy + theano.tensor.inv(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - fyv + (1 / fzv), 'float32'),  # 55
+            (fx - fy + theano.tensor.neg(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - fyv + (-fzv), 'float32'),
+            (fx - fy + theano.tensor.round(fz), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                fxv - fyv + numpy.round(fzv), 'float32'),
+            (ix - iy + theano.tensor.iround(fz), (ix, iy, fz), (ixv, iyv, fzv), 1,
+                ixv - iyv + numpy.round(fzv), 'int64'),
            # Bit op
-            (fx-theano.tensor.or_(iy, iz), (fx, iy, iz), (fxv, iyv,
-                izv), 1, fxv-(iyv|izv), {'custom': 'float64', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-            (fx-theano.tensor.xor(iy, iz), (fx, iy, iz), (fxv, iyv,
-                izv), 1, fxv-(iyv^izv), {'custom': 'float64', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),  # 60
-            (fx-theano.tensor.and_(iy, iz), (fx, iy, iz), (fxv, iyv,
-                izv), 1, fxv-(iyv&izv), {'custom': 'float64', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-            (fx-theano.tensor.invert(iy), (fx, iy), (fxv, iyv), 1,
-                fxv-(~iyv), {'custom': 'float64', 'numpy+floatX': config.floatX, 'numpy': 'float64'}),
-
-            (fx-theano.tensor.cast(fy, dtype='float64'), (fx, fy), (fxv, fyv), 1,
-                              fxv-numpy.asarray(fyv, 'float64'), 'float64'),
-            (theano.tensor.pow(fx*fy+fz, fx*fy), (fx, fy, fz), (fxv,
-                fyv, fzv), 1, numpy.power(fxv*fyv+fzv, fxv*fyv), 'float32'),
-            (fv+fy**fz, (fv, fy, fz), (fvv, fyv, fzv), 2, fvv+fyv**fzv,
-                'float32'),  # fused with a dimshuffle #65
-            (fv-fy+tensor.tanh(fz), (fv, fy, fz), (fvv, fyv, fzv), 2,
-                fvv-fyv+numpy.tanh(fzv), 'float32'),  # fused with a dimshuffle
+            (fx - theano.tensor.or_(iy, iz), (fx, iy, iz), (fxv, iyv, izv), 1,
+                fxv - (iyv | izv), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),
+            (fx - theano.tensor.xor(iy, iz), (fx, iy, iz), (fxv, iyv, izv), 1,
+                fxv - (iyv ^ izv), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),  # 60
+            (fx - theano.tensor.and_(iy, iz), (fx, iy, iz), (fxv, iyv, izv), 1,
+                fxv - (iyv & izv), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),
+            (fx - theano.tensor.invert(iy), (fx, iy), (fxv, iyv), 1,
+                fxv - (~iyv), {'custom': 'float64', 'numpy + floatX': config.floatX, 'numpy': 'float64'}),
+
+            (fx - theano.tensor.cast(fy, dtype='float64'), (fx, fy), (fxv, fyv), 1,
+                fxv - numpy.asarray(fyv, 'float64'), 'float64'),
+            (theano.tensor.pow(fx * fy + fz, fx * fy), (fx, fy, fz), (fxv, fyv, fzv), 1,
+                numpy.power(fxv * fyv + fzv, fxv * fyv), 'float32'),
+            (fv + fy ** fz, (fv, fy, fz), (fvv, fyv, fzv), 2, fvv + fyv ** fzv, 'float32'),  # fused with a dimshuffle #65
+            (fv - fy + tensor.tanh(fz), (fv, fy, fz), (fvv, fyv, fzv), 2,
+                fvv - fyv + numpy.tanh(fzv), 'float32'),  # fused with a dimshuffle

            # Cases where the same input is reused many times.
            (theano.tensor.mul(fx, fx, fx, fx), (fx,), (fxv,), 1, fxv *
-                fxv*fxv*fxv, 'float32'),
+                fxv * fxv * fxv, 'float32'),
            (theano.tensor.mul(fx, ftanx, ftanx), (fx,), (fxv,), 1,
-                fxv*numpy.tan(fxv)*numpy.tan(fxv), 'float32'),
+                fxv * numpy.tan(fxv) * numpy.tan(fxv), 'float32'),
            (theano.tensor.mul(fx, ftanx, ftanx, fx), (fx,), (fxv,),
-                1, fxv*numpy.tan(fxv)*numpy.tan(fxv)*fxv, 'float32'),
-            (theano.tensor.mul(ftanx, ftanx, fx+fy), (fx, fy), (fxv,
-                fyv), 1, numpy.tan(fxv)*numpy.tan(fxv)*(fxv+fyv), 'float32'),  # 70
+                1, fxv * numpy.tan(fxv) * numpy.tan(fxv) * fxv, 'float32'),
+            (theano.tensor.mul(ftanx, ftanx, fx + fy), (fx, fy), (fxv, fyv),
+                1, numpy.tan(fxv) * numpy.tan(fxv) * (fxv + fyv), 'float32'),  # 70

            # Cases with different broadcast pattern. They should not
            # be merged as this would duplicate computation
            # The graph should have 2 elemwise and 1 dimshuffle
-            (fx*theano.tensor.sin(fs), (fx, fs), (fxv,
-                fsv), 3, fxv*numpy.sin(fsv), 'float32'),
+            (fx * theano.tensor.sin(fs), (fx, fs), (fxv, fsv), 3,
+                fxv * numpy.sin(fsv), 'float32'),
            ]
        if slice:
            cases = cases[slice]
@@ -1192,15 +1138,14 @@ class test_fusion(unittest.TestCase):
                print(val_inputs)
                print(out)
                print(answer * nb_repeat)
-                #assert 0
            topo = f.maker.fgraph.toposort()
            if gpu:
                import theano.sandbox.cuda as cuda
                topo_ = [x for x in topo if not isinstance(
                    x.op, (cuda.basic_ops.GpuFromHost, cuda.basic_ops.HostFromGpu))]

-                gpu_ = [x for x in topo if isinstance(x.op,
-                    cuda.basic_ops.GpuFromHost)]
+                gpu_ = [x for x in topo
+                        if isinstance(x.op, cuda.basic_ops.GpuFromHost)]
                if not len(gpu_) == len(sym_inputs):
                    fail2.append((id, gpu_, sym_inputs))
            else:
@@ -1344,8 +1289,8 @@ class test_fusion(unittest.TestCase):
        shp = (3000, 3000)
        shp = (1000, 1000)
        nb_repeat = 50
-#        linker=gof.CLinker
-#        linker=gof.OpWiseCLinker
+        # linker=gof.CLinker
+        # linker=gof.OpWiseCLinker

        mode1 = copy.copy(compile.get_default_mode())
        mode1._optimizer = mode1._optimizer.including('local_elemwise_fusion')
@@ -1368,7 +1313,7 @@ class test_fusion(unittest.TestCase):

        print("times2/times1")
        print(d)
-        print("min", d.min(), "argmin", d.argmin(), "max", d.max(), \
+        print("min", d.min(), "argmin", d.argmin(), "max", d.max(),
              "mean", d.mean(), "std", d.std())

    def test_fusion_inplace(self):
@@ -1389,8 +1334,8 @@ class test_fusion(unittest.TestCase):

    def speed_fusion_gpu(self):
        import theano.sandbox.cuda as cuda
-        self.speed_fusion(shared_fn=cuda.
-            float32_shared_constructor, gpu=True, s=slice(0, 15))
+        self.speed_fusion(shared_fn=cuda.float32_shared_constructor,
+                          gpu=True, s=slice(0, 15))

    def speed_log_exp(self):
        s = slice(31, 36)
@@ -1428,13 +1373,15 @@ class test_fusion(unittest.TestCase):
                gc.collect()
                gc.collect()
                gc.collect()
-#                print 'v1',v1
-                v1 = weakref.ref(v)
+                v1 = weakref.ref(v)  # noqa
                pdb.set_trace()
+                # no memory leak
                # f = orig_function([compile.In(fx),compile.In(variable=fy, value=None)],
-                #            [fy+fx],mode=mode)#no memory leak
-                f = orig_function([compile.In(fx), compile.In(variable=fy, value=v)],
-                            [fy + fx], mode=mode)  # memory leak
+                #            [fy+fx],mode=mode)
+                # memory leak
+                f = orig_function(  # noqa
+                    [compile.In(fx), compile.In(variable=fy, value=v)],
+                    [fy + fx], mode=mode)
                del v
                gc.collect()
                gc.collect()
@@ -1471,8 +1418,7 @@ class test_fusion(unittest.TestCase):
                        for x in ndl:
                            cmp = not isinstance(x, list)
                            if not cmp and x:
-                                cmp = x[0].__class__.__name__ != \
-                                        'array_converter'
+                                cmp = (x[0].__class__.__name__ != 'array_converter')
                                if cmp:
                                    cmp = x[0] != 'Option'
                                if cmp:
@@ -1568,7 +1514,8 @@ class TestCompositeCodegen(unittest.TestCase):

        y = self.times_2(self.x)
        z = self.times_3(y)
-        f = theano.function([self.x], cuda.gpu_from_host(z),
+        f = theano.function(
+            [self.x], cuda.gpu_from_host(z),
            mode=theano.compile.mode.get_default_mode().including('gpu'))
        topo = f.maker.fgraph.toposort()
        if config.mode != "FAST_COMPILE":
@@ -1607,8 +1554,8 @@ def test_log1p():
    assert [node.op for node in f.maker.fgraph.toposort()][3:] == [
        T.log1p, tensor.alloc]
    f = function([x, y], T.log(2 + (x) - tensor.fill(y, 1.0)), mode=m)
-    assert [node.op for node in f.maker.fgraph.toposort()][3:] \
-            == [T.log1p, tensor.alloc]
+    assert ([node.op for node in f.maker.fgraph.toposort()][3:] ==
+            [T.log1p, tensor.alloc])

    f([1e-7, 10], [[0, 0], [0, 0]])  # debugmode will verify values

@@ -1712,6 +1659,7 @@ def test_local_useless_slice():
    assert check_stack_trace(f_opt_check, ops_to_check=Subtensor)
    assert check_stack_trace(f_opt_check_apply, ops_to_check=Subtensor)

+
 def test_local_useless_inc_subtensor():
    x = tensor.matrix('x')
    y = tensor.matrix('y')
@@ -2119,8 +2067,8 @@ class test_local_subtensor_lift(unittest.TestCase):
        assert isinstance(prog[3].op.scalar_op, theano.scalar.
                          Composite)  # Composite{add,add}
        assert len(prog) == 4
-        f([[0, 1], [2, 3]], 4, [[4, 5], [6, 7]])
        # let debugmode test something
+        f([[0, 1], [2, 3]], 4, [[4, 5], [6, 7]])

    def test2(self):
        # as 1, but take a slice
@@ -2140,8 +2088,8 @@ class test_local_subtensor_lift(unittest.TestCase):
        assert isinstance(prog[3].op.scalar_op, theano.scalar.
                          Composite)  # Composite{add,add}
        assert len(prog) == 4
-        f([[0, 1], [2, 3]], 4, [[4, 5], [6, 7]])
        # let debugmode test something
+        f([[0, 1], [2, 3]], 4, [[4, 5], [6, 7]])

    def test3(self):
        # basic test that the optimization does work with broadcasting
@@ -2270,7 +2218,6 @@ class test_local_subtensor_lift(unittest.TestCase):
        newz = tensor.Rebroadcast((3, True))(z)
        assert newz.broadcastable == (False, False, False, True)

-        out = newz[:, 3, 0]
        f4 = function([z], newz[:, 3, 0], mode=mode_opt)
        # Check stacktrace was copied over correctly after opt was applied
        self.assertTrue(check_stack_trace(f4, ops_to_check=[
@@ -2323,7 +2270,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        f = function([x, y], x[y::][-1], mode=mode_opt)
        g = function([x, y], x[y::][-1],
                     mode=mode_opt.excluding('local_subtensor_merge'))
-        #theano.printing.debugprint(f, print_type=True)
+        # theano.printing.debugprint(f, print_type=True)

        # Check stacktrace was copied over correctly after opt was applied
        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
@@ -2358,7 +2305,7 @@ class test_local_subtensor_merge(unittest.TestCase):
            # Check stacktrace was copied over correctly after opt was applied
            self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))

-            #theano.printing.debugprint(f, print_type=True)
+            # theano.printing.debugprint(f, print_type=True)
            topo = f.maker.fgraph.toposort()
            # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
            assert len([t for t in topo
@@ -2384,7 +2331,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        f = function([x, y], x[::-1][y], mode=mode_opt)
        g = function([x, y], x[::-1][y],
                     mode=mode_opt.excluding('local_subtensor_merge'))
-        #theano.printing.debugprint(f, print_type=True)
+        # theano.printing.debugprint(f, print_type=True)

        # Check stacktrace was copied over correctly after opt was applied
        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))
@@ -2414,7 +2361,7 @@ class test_local_subtensor_merge(unittest.TestCase):
            # Check stacktrace was copied over correctly after opt was applied
            self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))

-            #theano.printing.debugprint(f, print_type=True)
+            # theano.printing.debugprint(f, print_type=True)
            topo = f.maker.fgraph.toposort()
            # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
            assert len([t for t in topo
@@ -2435,7 +2382,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        # Check stacktrace was copied over correctly after opt was applied
        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))

-        #theano.printing.debugprint(f, print_type=True)
+        # theano.printing.debugprint(f, print_type=True)

        topo = f.maker.fgraph.toposort()
        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
@@ -2457,9 +2404,9 @@ class test_local_subtensor_merge(unittest.TestCase):
                f = function([x], x[idx1:][:idx2], mode=mode_opt)

                # Check stacktrace was copied over correctly after opt was applied
-                self.assertTrue(check_stack_trace(f, ops_to_check='all'))
+                self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))

-                #theano.printing.debugprint(f, print_type=True)
+                # theano.printing.debugprint(f, print_type=True)
                topo = f.maker.fgraph.toposort()
                # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
                assert len([t for t in topo
@@ -2481,7 +2428,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        # Check stacktrace was copied over correctly after opt was applied
        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))

-        #theano.printing.debugprint(f, print_type=True)
+        # theano.printing.debugprint(f, print_type=True)

        topo = f.maker.fgraph.toposort()
        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
@@ -2513,7 +2460,6 @@ class test_local_subtensor_merge(unittest.TestCase):
            # Check stacktrace was copied over correctly after opt was applied
            self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))

-
            x_val = self.rng.uniform(size=shape).astype(config.floatX)
            f(x_val)

@@ -2533,7 +2479,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        # Check stacktrace was copied over correctly after opt was applied
        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))

-        #theano.printing.debugprint(f, print_type=True)
+        # theano.printing.debugprint(f, print_type=True)

        topo = f.maker.fgraph.toposort()
        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
@@ -2562,7 +2508,7 @@ class test_local_subtensor_merge(unittest.TestCase):
                                for s2 in s2r:
                                    f(x_val, b1, e1, s1, b2, e2, s2)

-    def test_const4(self):
+    def test_const5(self):
        # Bug reported by Razvan
        data = numpy.asarray(numpy.arange(8),
                             dtype=theano.config.floatX)
@@ -2575,7 +2521,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        val = fun(data)
        assert val == data[7:1:-1][0]

-    def test_const5(self):
+    def test_const6(self):
        # Bug reported by Graham
        data = self.rng.uniform(size=(8, 8, 8)).astype(theano.config.floatX)
        x = theano.tensor.tensor3('x')
@@ -2621,7 +2567,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        # Check stacktrace was copied over correctly after opt was applied
        self.assertTrue(check_stack_trace(f, ops_to_check=Subtensor))

-        #theano.printing.debugprint(f, print_type=True)
+        # theano.printing.debugprint(f, print_type=True)

        topo = f.maker.fgraph.toposort()
        # print [t for t in topo if isinstance(t.op, tensor.Subtensor)]
@@ -2877,7 +2823,6 @@ class test_local_adv_sub1_adv_inc_sub1(unittest.TestCase):

            dx = numpy.random.rand(4, 5).astype(config.floatX)
            dy = numpy.random.rand(2, 5).astype(config.floatX)
-            didx = numpy.asarray([1, 3], "int32")

            # set_subtensor
            inc = tensor.set_subtensor(x[idx], y)
@@ -2924,8 +2869,8 @@ class Test_alloc_zero(unittest.TestCase):
        y0 = tensor.zeros_like(y)
        z = tensor.set_subtensor(x0[:4], y0)
        f = theano.function([x, y], z, mode=self.mode)
-        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor) for n in
-                          f.maker.fgraph.toposort()])
+        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor)
+                          for n in f.maker.fgraph.toposort()])

    def test_setsubtensor_allocs1(self):
        y = tensor.matrix()
@@ -2934,8 +2879,8 @@ class Test_alloc_zero(unittest.TestCase):
        y0 = tensor.zeros_like(y)
        z = tensor.set_subtensor(x0[:4], y0)
        f = theano.function([y], z, mode=self.mode)
-        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor) for n in
-                          f.maker.fgraph.toposort()])
+        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor)
+                          for n in f.maker.fgraph.toposort()])

    def test_setsubtensor_allocs1t(self):
        y = tensor.matrix()
@@ -2944,8 +2889,8 @@ class Test_alloc_zero(unittest.TestCase):
        y0 = tensor.zeros_like(y)
        z = tensor.set_subtensor(x0[:4], y0.T)
        f = theano.function([y], z, mode=mode_opt)
-        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor) for n in
-                          f.maker.fgraph.toposort()])
+        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor)
+                          for n in f.maker.fgraph.toposort()])

    def test_setsubtensor_allocs2(self):
        x = tensor.matrix()
@@ -2954,8 +2899,8 @@ class Test_alloc_zero(unittest.TestCase):
        x0 = tensor.zeros_like(x)
        z = tensor.set_subtensor(x0[:4], y0)
        f = theano.function([x], z, mode=self.mode)
-        assert numpy.all([not isinstance(x.op, tensor.IncSubtensor) for x in
-                           f.maker.fgraph.toposort()])
+        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor)
+                          for n in f.maker.fgraph.toposort()])

    def test_incsubtensor_allocs0(self):
        x = tensor.matrix()
@@ -2963,8 +2908,8 @@ class Test_alloc_zero(unittest.TestCase):
        y0 = tensor.zeros_like(y)
        z = tensor.inc_subtensor(x[:4], y0)
        f = theano.function([x, y], z, mode=self.mode)
-        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor) for n in
-                          f.maker.fgraph.toposort()])
+        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor)
+                          for n in f.maker.fgraph.toposort()])

    def test_incsubtensor_allocs0t(self):
        x = tensor.matrix()
@@ -2972,8 +2917,8 @@ class Test_alloc_zero(unittest.TestCase):
        y0 = tensor.zeros_like(y)
        z = tensor.inc_subtensor(x[:4], y0.T)
        f = theano.function([x, y], z, mode=mode_opt)
-        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor) for n in
-                          f.maker.fgraph.toposort()])
+        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor)
+                          for n in f.maker.fgraph.toposort()])

    def test_incsubtensor_allocs1(self):
        x = tensor.matrix()
@@ -2981,8 +2926,8 @@ class Test_alloc_zero(unittest.TestCase):
                                           dtype=config.floatX))
        z = tensor.inc_subtensor(x[:4], y0)
        f = theano.function([x], z, mode=self.mode)
-        assert numpy.all([not isinstance(x.op, tensor.IncSubtensor) for x in
-                           f.maker.fgraph.toposort()])
+        assert numpy.all([not isinstance(n.op, tensor.IncSubtensor)
+                          for n in f.maker.fgraph.toposort()])

    def test_advancedincsubtensor1_allocs0(self):
        x = tensor.matrix()
@@ -3116,6 +3061,7 @@ def test_local_IncSubtensor_serialize():
        tensor.IncSubtensor, tensor.AdvancedIncSubtensor,
        tensor.AdvancedIncSubtensor1])

+
 def test_local_set_to_inc_subtensor():
    v = theano.tensor.fmatrix()
    s = v[[2, 1]]
@@ -3590,8 +3536,6 @@ class Test_local_useless_elemwise_comparison(unittest.TestCase):
        x_val = 10
        assert f(x_val) == x_val

-    #def assert_returns
-
    def test_inequality_with_self(self):
        x = T.scalar('x', dtype=config.floatX)
        mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison')
@@ -3616,7 +3560,8 @@ class Test_local_useless_elemwise_comparison(unittest.TestCase):

    def test_shape_inequality_with_self(self):
        x = T.vector('x', dtype=config.floatX)
-        mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison',
+        mode = theano.compile.get_default_mode().including(
+            'local_useless_elemwise_comparison',
            'local_shape_to_shape_i',
            'local_track_shape_i',
            'local_subtensor_make_vector')
@@ -3649,22 +3594,23 @@ class Test_local_useless_elemwise_comparison(unittest.TestCase):
        assert f(x_val) == 0
        f = theano.function([x], T.minimum([0, 0], x.shape[0]), mode=mode)
        # This case isn't optimized.
-#        self.assert_eqs_const(f, 0)
+        # self.assert_eqs_const(f, 0)
        utt.assert_allclose(f(x_val), [0, 0])

    def test_shape_add_inequality(self):
        x = T.vector('x', dtype=config.floatX)
-        mode = theano.compile.get_default_mode().including('local_useless_elemwise_comparison',
+        mode = theano.compile.get_default_mode().including(
+            'local_useless_elemwise_comparison',
            'local_shape_to_shape_i',
            'local_track_shape_i',
            'local_subtensor_make_vector')

        y = T.vector('y', dtype=config.floatX)

-        f = theano.function([x, y], T.lt(x.shape[0]+y.shape[0], 0), mode=mode)
+        f = theano.function([x, y], T.lt(x.shape[0] + y.shape[0], 0), mode=mode)
        self.assert_eqs_const(f, 0)

-        f = theano.function([x, y], T.ge(x.shape[0]+y.shape[0], 0), mode=mode)
+        f = theano.function([x, y], T.ge(x.shape[0] + y.shape[0], 0), mode=mode)
        self.assert_eqs_const(f, 1)

    def test_equality_shapes(self):
@@ -3755,8 +3701,8 @@ class Test_local_canonicalize_alloc(unittest.TestCase):

        f = function([], a, mode=mode_opt)
        # The optimization should then be applied, and remove Alloc
-        assert ([node.op for node in f.maker.fgraph.toposort()]
-                == [deep_copy_op])
+        assert ([node.op for node in f.maker.fgraph.toposort()] ==
+                [deep_copy_op])

        # In DebugMode, the shape mismatch should be detected
        if isinstance(mode_opt, compile.DebugMode):
@@ -3786,7 +3732,7 @@ class Test_local_canonicalize_alloc(unittest.TestCase):
        mode = mode_opt.excluding('local_canonicalize_alloc')

        x = tensor.matrix('x')
-        y = tensor.tile(x, (1,)*2)
+        y = tensor.tile(x, (1,) * 2)

        f = function([x], [y], mode=mode)
        op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
@@ -3896,7 +3842,6 @@ class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
        self.assertTrue(check_stack_trace(f1, ops_to_check=tensor.AdvancedIncSubtensor))
        self.assertTrue(check_stack_trace(f2, ops_to_check=tensor.AdvancedIncSubtensor))

-
    def test_advanced_inc_subtensor1(self):
        if tensor.inplace_increment is None:
            raise SkipTest('NumPy version >= 1.8 not available')
@@ -4271,6 +4216,7 @@ class test_assert(utt.InferShapeTester):
        self._compile_and_check([admat, adscal, bdscal], [out],
                                [admat_val, adscal_val, bdscal_val], Assert)

+
 def test_local_mul_specialize():
    mode = theano.config.mode
    if mode == 'FAST_COMPILE':
@@ -4607,7 +4553,6 @@ class T_func_inverse(unittest.TestCase):
        mode = theano.compile.get_default_mode()
        self.mode = mode.including('local_func_inv')

-
    def assert_func_pair_optimized(self, func1, func2, data,
                                   should_copy=True, is_complex=False):
        """
@@ -4643,7 +4588,7 @@ class T_func_inverse(unittest.TestCase):

        dx = numpy.random.rand(5, 4).astype("float32")
        self.assert_func_pair_optimized(T.deg2rad, T.rad2deg, dx)
-        dx = numpy.random.rand(5, 4).astype("float32")*180
+        dx = numpy.random.rand(5, 4).astype("float32") * 180
        self.assert_func_pair_optimized(T.rad2deg, T.deg2rad, dx)

        # Test the other functional inverses
@@ -4653,13 +4598,13 @@ class T_func_inverse(unittest.TestCase):
        self.assert_func_pair_optimized(T.arctanh, T.tanh, dx)
        self.assert_func_pair_optimized(T.inv, T.inv, dx)
        self.assert_func_pair_optimized(T.neg, T.neg, dx)
-        cx = dx + complex(0, 1)*(dx + 0.01)
+        cx = dx + complex(0, 1) * (dx + 0.01)
        self.assert_func_pair_optimized(T.conj, T.conj, cx, is_complex=True)

        # Test that non-inverse functions are ran normally
        self.assert_func_pair_optimized(T.conj, T.neg, cx,
                                        should_copy=False, is_complex=True)
-        dx = numpy.random.rand(5, 4).astype("float32")+0.01
+        dx = numpy.random.rand(5, 4).astype("float32") + 0.01
        self.assert_func_pair_optimized(T.rad2deg, T.rad2deg, dx,
                                        should_copy=False)
        self.assert_func_pair_optimized(T.rad2deg, T.cosh, dx,
@@ -4705,8 +4650,8 @@ def test_constant_get_stabilized():
    f2 = theano.function([x2], y2, mode=mode)
    try:
        assert len(f2.maker.fgraph.toposort()) == 1
-        assert f2.maker.fgraph.toposort()[0].op == \
-                theano.tensor.nnet.sigm.softplus
+        assert (f2.maker.fgraph.toposort()[0].op ==
+                theano.tensor.nnet.sigm.softplus)
        assert f2(800) == 800

        x = T.as_tensor_variable(800)
@@ -4739,17 +4684,41 @@ class T_local_switch_sink(unittest.TestCase):
        self.xs = 1.

        # expected results
-        self.resm = [numpy.asarray([[1, 0, 1, 0], [0, 0, 0, 0], [1, 1, 1, 1]])]*3 + [numpy.asarray([[1, 0, 1, 0], [1, 0, 1, 0], [1, 0, 1, 0]])] + \
-                    2*[numpy.asarray([[1, 0, 1, 0]])] + [[numpy.ones((3, 4)), numpy.zeros((3, 4)), numpy.ones((3, 4)), numpy.zeros((3, 4))]] + \
-                    [[numpy.ones((4,)), numpy.zeros((4,)), numpy.ones((4,)), numpy.zeros((4,))]] + \
-                    [[numpy.asarray(1.0), numpy.asarray(
-                        0.0), numpy.asarray(1.0), numpy.asarray(0.0)]]
+        self.resm = (
+            [numpy.asarray([[1, 0, 1, 0], [0, 0, 0, 0], [1, 1, 1, 1]])] * 3 +
+            [numpy.asarray([[1, 0, 1, 0], [1, 0, 1, 0], [1, 0, 1, 0]])] +
+            2 * [numpy.asarray([[1, 0, 1, 0]])] +
+            [[numpy.ones((3, 4)), numpy.zeros((3, 4)), numpy.ones((3, 4)), numpy.zeros((3, 4))]] +
+            [[numpy.ones((4,)), numpy.zeros((4,)), numpy.ones((4,)), numpy.zeros((4,))]] +
+            [[numpy.asarray(1.0), numpy.asarray(0.0), numpy.asarray(1.0), numpy.asarray(0.0)]])

        self.mode = theano.compile.mode.get_default_mode().including(
            'canonicalize', 'fast_run').excluding('gpu', 'fusion')
        self.mode = copy.copy(self.mode)
        self.mode.check_isfinite = False

+    def function_remove_nan(self, *args, **kwargs):
+        """Wrapper around theano.function for this test.
+
+        It disables checking
+        for NaN removed by optimizations in DebugMode (it has false
+        positives in that case).
+        """
+        f = theano.function(*args, **kwargs)
+
+        def wrapped_f(*args, **kwargs):
+            # This is a bit ugly since it changes the global value of
+            # TensorType.values_eq_approx.
+            old_values_eq_approx = staticmethod(TensorType.values_eq_approx)
+            TensorType.values_eq_approx = staticmethod(values_eq_approx_remove_nan)
+            try:
+                out = f(*args, **kwargs)
+            finally:
+                TensorType.values_eq_approx = old_values_eq_approx
+            return out
+
+        return wrapped_f
+
    def test_local_mul_switch_sink(self):
        c = T.dscalar()
        idx = 0
@@ -4761,7 +4730,7 @@ class T_local_switch_sink(unittest.TestCase):
                y = T.mul(T.switch(condition[0] > 0, 1. * x[0], 0. * x[0]),
                          T.switch(condition[0] > 0,
                                   1. * x[0], T.log(c) * x[0]))
-                f = theano.function([condition[0], x[0], c],
+                f = self.function_remove_nan([condition[0], x[0], c],
                                             [y], mode=self.mode)
                if type(condition[1]) is list:
                    for i in xrange(len(condition[1])):
@@ -4770,14 +4739,14 @@ class T_local_switch_sink(unittest.TestCase):
                            self.resm[idx][i])).sum() == self.resm[idx][i].size
                else:
                    res = f(condition[1], x[1], -1)
-                    assert (res == numpy.asarray(self.
-                        resm[idx])).sum() == self.resm[idx].size
+                    assert ((res == numpy.asarray(self.resm[idx])).sum() ==
+                            self.resm[idx].size)
                idx += 1

        # This case caused a missed optimization in the past.
        x = T.dscalar('x')
        y = T.switch(x < 7, x, T.sqrt(x - 7))
-        f = theano.function([x], T.grad(y, x), self.mode)
+        f = self.function_remove_nan([x], T.grad(y, x), self.mode)
        assert f(5) == 1, f(5)

    @attr('slow')
@@ -4786,19 +4755,20 @@ class T_local_switch_sink(unittest.TestCase):
        idx = 0
        for condition in [(T.dmatrix('cond'), self.condm), (T.dvector('cond'), self.condv), (T.dscalar('cond'), self.conds)]:
            for x in [(T.dmatrix('x'), self.xm), (T.dvector('x'), self.xv), (T.dscalar('x'), self.xs)]:
-                y = T.true_div(T.switch(condition[0] > 0, 1. *
-                    x[0], 0.*x[0]), T.switch(condition[0] > 0, 1.*x[0], T.log(c)*x[0]))
-                f = theano.function([condition[0], x[0], c]
-                    , [y], mode=self.mode)
+                y = T.true_div(
+                    T.switch(condition[0] > 0, 1. * x[0], 0. * x[0]),
+                    T.switch(condition[0] > 0, 1. * x[0], T.log(c) * x[0]))
+                f = self.function_remove_nan([condition[0], x[0], c],
+                                             [y], mode=self.mode)
                if type(condition[1]) is list:
                    for i in xrange(len(condition[1])):
                        res = f(condition[1][i], x[1], -1)
-                        assert (res == numpy.
-                            asarray(self.resm[idx][i])).sum() == self.resm[idx][i].size
+                        assert ((res == numpy.asarray(self.resm[idx][i])).sum() ==
+                                self.resm[idx][i].size)
                else:
                    res = f(condition[1], x[1], -1)
-                    assert (res == numpy.asarray(self.
-                        resm[idx])).sum() == self.resm[idx].size
+                    assert ((res == numpy.asarray(self.resm[idx])).sum() ==
+                            self.resm[idx].size)
                idx += 1


@@ -4839,18 +4809,18 @@ class T_local_erf(unittest.TestCase):
        x = T.vector()

        f = theano.function([x], 1 - T.erf(x), mode=self.mode)
-        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc]\
-            , f.maker.fgraph.toposort()
+        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc],\
+            f.maker.fgraph.toposort()
        print(f(val))

        f = theano.function([x], 1 + (-T.erf(x)), mode=self.mode)
-        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc]\
-            , f.maker.fgraph.toposort()
+        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc],\
+            f.maker.fgraph.toposort()
        print(f(val))

        f = theano.function([x], (-T.erf(x)) + 1, mode=self.mode)
-        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc]\
-            , f.maker.fgraph.toposort()
+        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erfc],\
+            f.maker.fgraph.toposort()
        print(f(val))

        f = theano.function([x], 2 - T.erf(x), mode=self.mode)
@@ -4907,13 +4877,13 @@ class T_local_erfc(unittest.TestCase):
        x = T.vector('x')

        f = theano.function([x], 1 - T.erfc(x), mode=self.mode)
-        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf]\
-            , f.maker.fgraph.toposort()
+        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
+            f.maker.fgraph.toposort()
        print(f(val))

        f = theano.function([x], (-T.erfc(x)) + 1, mode=self.mode)
-        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf]\
-            , f.maker.fgraph.toposort()
+        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
+            f.maker.fgraph.toposort()
        print(f(val))

        f = theano.function([x], 2 - T.erfc(x), mode=self.mode)
@@ -4921,8 +4891,8 @@ class T_local_erfc(unittest.TestCase):
        assert len(topo) == 2, f.maker.fgraph.toposort()
        assert topo[0].op == T.erfc, f.maker.fgraph.toposort()
        assert isinstance(topo[1].op, T.Elemwise), f.maker.fgraph.toposort()
-        assert isinstance(topo[1].op.scalar_op, scal.Sub)\
-            , f.maker.fgraph.toposort()
+        assert isinstance(topo[1].op.scalar_op, scal.Sub),\
+            f.maker.fgraph.toposort()
        print(f(val))

    def test_local_erf_neg_minus_one(self):
@@ -4932,18 +4902,18 @@ class T_local_erfc(unittest.TestCase):
        x = T.vector('x')

        f = theano.function([x], -1 + T.erfc(-x), mode=self.mode)
-        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf]\
-            , f.maker.fgraph.toposort()
+        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
+            f.maker.fgraph.toposort()
        print(f(val))

        f = theano.function([x], T.erfc(-x) - 1, mode=self.mode)
-        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf]\
-            , f.maker.fgraph.toposort()
+        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
+            f.maker.fgraph.toposort()
        print(f(val))

        f = theano.function([x], T.erfc(-x) + (-1), mode=self.mode)
-        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf]\
-            , f.maker.fgraph.toposort()
+        assert [n.op for n in f.maker.fgraph.toposort()] == [T.erf],\
+            f.maker.fgraph.toposort()
        print(f(val))

    def test_local_log_erfc(self):
@@ -4989,7 +4959,9 @@ class T_local_erfc(unittest.TestCase):
        val = [-100, -30, -27, -26.4, -26.2, -26, -11, -10, -9, -3, -2, -1, 0,
               1, 2, 3, 9, 10, 11, 27, 26.4, 26.2, 26, 28, 30, 100]
        if theano.config.mode in ["DebugMode", "DEBUG_MODE", "FAST_COMPILE"]:
-# python mode don't like the inv(0) in computation, but the switch don't select this value. So it is computed for no good reason.
+            # python mode don't like the inv(0) in computation,
+            # but the switch don't select this value.
+            # So it is computed for no good reason.
            val.remove(0)
        if theano.config.mode in ["DebugMode", "DEBUG_MODE"] and theano.config.floatX == 'float32':
            # In float32 their is a plage of values close to 10 that we stabilize as it give bigger error then the stabilized version.
@@ -5011,8 +4983,10 @@ class T_local_erfc(unittest.TestCase):
        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX

        # test with a different mul constant
-        f = theano.function([x], T.mul(T.exp(T.neg(T.sqr(x))), -
-            10.12837917) / T.erfc(x), mode=mode)
+        f = theano.function(
+            [x],
+            T.mul(T.exp(T.neg(T.sqr(x))), - 10.12837917) / T.erfc(x),
+            mode=mode)
        assert len(f.maker.fgraph.apply_nodes) == 23, len(f.maker.fgraph.apply_nodes)
        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX
        assert all(numpy.isfinite(f(val)))
@@ -5037,14 +5011,12 @@ class T_local_erfc(unittest.TestCase):
        assert all(numpy.isfinite(f(val)))

        # test that it work correctly if x is x*2 in the graph.
-        f = theano.function([x], T.grad(T.log(T.erfc(2 * x)).sum(),
-             x), mode=mode)
+        f = theano.function([x], T.grad(T.log(T.erfc(2 * x)).sum(), x), mode=mode)
        assert len(f.maker.fgraph.apply_nodes) == 23, len(f.maker.fgraph.apply_nodes)
        assert numpy.isfinite(f(val)).all()
        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX

-        f = theano.function([x], T.grad(T.log(T.erfc(x)).sum(), x),
-             mode=mode_fusion)
+        f = theano.function([x], T.grad(T.log(T.erfc(x)).sum(), x), mode=mode_fusion)
        assert len(f.maker.fgraph.apply_nodes) == 1, len(f.maker.fgraph.apply_nodes)
        assert f.maker.fgraph.outputs[0].dtype == theano.config.floatX

@@ -5066,8 +5038,8 @@ class T_local_erfc(unittest.TestCase):
        val = numpy.random.rand(1e6)
        x = T.vector()
        mode = theano.compile.mode.get_mode("FAST_RUN")
-        f1 = theano.function([x], T.log(T.erfc(x)), mode=mode.
-            excluding("local_log_erfc"))
+        f1 = theano.function([x], T.log(T.erfc(x)),
+                             mode=mode.excluding("local_log_erfc"))
        f2 = theano.function([x], T.log(T.erfc(x)), mode=mode)
        print(f1.maker.fgraph.toposort())
        print(f2.maker.fgraph.toposort())
@@ -5092,8 +5064,8 @@ class test_local_useless_switch(unittest.TestCase):
                z = theano.tensor.switch(0, x, y)
                f = theano.function([x, y], z, mode=self.mode)
                assert len([node.op for node in f.maker.fgraph.toposort() if
-                            (isinstance(node.op, theano.tensor.Elemwise)
-                           and isinstance(node.op.scalar_op,
+                            (isinstance(node.op, theano.tensor.Elemwise) and
+                             isinstance(node.op.scalar_op,
                                        theano.scalar.basic.Switch))]) == 0
                vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
                vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2)
@@ -5108,8 +5080,8 @@ class test_local_useless_switch(unittest.TestCase):
                z = theano.tensor.switch(1, x, y)
                f = theano.function([x, y], z, mode=self.mode)
                assert len([node.op for node in f.maker.fgraph.toposort() if
-                            (isinstance(node.op, theano.tensor.Elemwise)
-                           and isinstance(node.op.scalar_op,
+                            (isinstance(node.op, theano.tensor.Elemwise) and
+                             isinstance(node.op.scalar_op,
                                        theano.scalar.basic.Switch))]) == 0
                vx = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
                vy = numpy.array([[7, 8, 9], [10, 11, 12]], dtype=dtype2)
@@ -5125,7 +5097,7 @@ class test_local_useless_switch(unittest.TestCase):
            z2 = theano.tensor.switch(varc, x, x)
            f1 = theano.function([x], z1, mode=self.mode)
            f0 = theano.function([x], z0, mode=self.mode)
-            f2 = theano.function([x,varc], z2, mode=self.mode)
+            f2 = theano.function([x, varc], z2, mode=self.mode)

            topo = f1.maker.fgraph.toposort()
            assert len(topo) == 1
@@ -5143,7 +5115,7 @@ class test_local_useless_switch(unittest.TestCase):
            vc = numpy.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
            assert numpy.all(f1(vx) == vx)
            assert numpy.all(f0(vx) == vx)
-            assert numpy.all(f2(vx,vc) == vx)
+            assert numpy.all(f2(vx, vc) == vx)

    def test_shape_le_0(self):

@@ -5157,11 +5129,10 @@ class test_local_useless_switch(unittest.TestCase):
            f1 = theano.function([x], z1, mode=self.mode)
            assert isinstance(f1.maker.fgraph.toposort()[0].op, Shape_i)

-            vx = numpy.random.randn(0,5).astype(dtype1)
+            vx = numpy.random.randn(0, 5).astype(dtype1)
            assert f0(vx) == 0
            assert f1(vx) == 5

-
    def test_broadcast1(self):
        # test switch(cst, matrix, row)
        x = theano.tensor.matrix('x', dtype='int32')
@@ -5484,9 +5455,9 @@ class T_local_sum_prod(unittest.TestCase):
                              dtype='float64')
        mode = self.mode.including('specialize').excluding('fusion')

-        for t_like, n_like, nb_nodes in [(tensor.zeros_like, numpy.zeros_like, (1, 3, 3, 2)),
+        for t_like, n_like, nb_nodes in [
+                (tensor.zeros_like, numpy.zeros_like, (1, 3, 3, 2)),
                (tensor.ones_like, numpy.ones_like, (5, 5, 5, 6))]:
-
            # test sum
            f = theano.function([a], t_like(a).sum(None), mode=mode)
            utt.assert_allclose(f(input), n_like(input).sum())
@@ -5514,23 +5485,23 @@ class T_local_sum_prod(unittest.TestCase):
            # test prod
            f = theano.function([a], t_like(a).prod(None), mode=mode)
            utt.assert_allclose(f(input), n_like(input).prod())
-            #assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]
+            # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]

            f = theano.function([a], t_like(a).prod([0, 1, 2]), mode=mode)
            utt.assert_allclose(f(input), n_like(input).prod())
-            #assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]
+            # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[0]

            for d in range(3):
                f = theano.function([a], t_like(a).prod(d), mode=mode)
                utt.assert_allclose(f(input), n_like(input).prod(d))
-                #assert len(f.maker.fgraph.apply_nodes) == nb_nodes[1]
+                # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[1]
                topo = f.maker.fgraph.toposort()
                assert topo[-1].op == T.alloc
                assert not any([isinstance(node.op, T.elemwise.Prod) for node in topo])
            for i in range(3):
                f = theano.function([a], t_like(a).prod(i), mode=mode)
                utt.assert_allclose(f(input), n_like(input).prod(i))
-                #assert len(f.maker.fgraph.apply_nodes) == nb_nodes[2]
+                # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[2]
                topo = f.maker.fgraph.toposort()
                assert topo[-1].op == T.alloc
                assert not any([isinstance(node.op, T.elemwise.Prod) for node in topo])
@@ -5563,7 +5534,7 @@ class T_local_sum_prod(unittest.TestCase):
        config.on_opt_error = 'raise'
        try:
            # This compilation would fail prior to fix.
-            f = theano.function([x], y)
+            theano.function([x], y)
        finally:
            config.on_opt_error = backup

@@ -5577,7 +5548,7 @@ class T_local_sum_prod(unittest.TestCase):
        config.on_opt_error = 'raise'
        try:
            # This compilation would fail prior to fix.
-            f = theano.function([x], y)
+            theano.function([x], y)
        finally:
            config.on_opt_error = backup

@@ -5931,8 +5902,7 @@ class TestMakeVector(utt.InferShapeTester):
            mv = opt.MakeVector(dtype=dtype)(*inputs)
            assert mv.dtype == dtype
            f = theano.function([b, i, d], mv, on_unused_input='ignore')
-            f_val = f(val[b], val[i], val[d])
-            # print 'f_val =', f_val
+            f(val[b], val[i], val[d])

            s = mv.sum()
            gb = T.grad(s, b, disconnected_inputs='ignore')
@@ -6075,7 +6045,6 @@ def test_local_join_empty():
                for n in e if isinstance(n.op, Join)])
    assert f.maker.fgraph.outputs[0].dtype == config.floatX

-
    # test for matrix join(1,a)
    empty_mat = numpy.asarray([[]], dtype=config.floatX)
    m = tensor.matrix('m')
@@ -6220,13 +6189,13 @@ def test_local_useless_split():
    f_opt = theano.function([x, splits], opt, mode=mode)
    f_nonopt = theano.function([x, splits], nonopt, mode=mode)

-    f_opt(numpy.random.rand(4,4).astype(config.floatX), [4])
-    f_nonopt(numpy.random.rand(4,4).astype(config.floatX), [1,2,1])
+    f_opt(numpy.random.rand(4, 4).astype(config.floatX), [4])
+    f_nonopt(numpy.random.rand(4, 4).astype(config.floatX), [1, 2, 1])
    graph_opt = f_opt.maker.fgraph.toposort()
    graph_nonopt = f_nonopt.maker.fgraph.toposort()

    assert isinstance(graph_opt[-1].op, DeepCopyOp)
-    assert len(graph_nonopt)==1
+    assert len(graph_nonopt) == 1
    assert isinstance(graph_nonopt[0].op, tensor.Split)

    assert check_stack_trace(f_opt, ops_to_check=[Assert])
@@ -6244,7 +6213,7 @@ def test_local_flatten_lift():
        x_np = numpy.random.rand(5, 4, 3, 2).astype(config.floatX)
        out_np = f(x_np)
        topo = f.maker.fgraph.toposort()
-        shape_out_np = tuple(x_np.shape[:i-1])+(numpy.prod(x_np.shape[i-1:]),)
+        shape_out_np = tuple(x_np.shape[:i - 1]) + (numpy.prod(x_np.shape[i - 1:]),)
        assert shape_out_np == out_np.shape

        reshape_nodes = [n for n in topo if isinstance(n.op, tensor.Reshape)]
@@ -6275,7 +6244,7 @@ class Test_local_useless_reshape(unittest.TestCase):
        mode = theano.compile.get_default_mode().including(
            'local_useless_reshape')
        i = T.iscalar('i')
-        m = theano.tensor.mgrid[0:i,]
+        m = theano.tensor.mgrid[0:i, ]
        f = theano.function([i], m, mode=mode)
        topo = f.maker.fgraph.toposort()
        assert not any(isinstance(n.op, tensor.basic.Reshape) for n in topo)
@@ -6528,7 +6497,7 @@ class TestIntDivByOne(unittest.TestCase):
        """Simple test case for removing dividing by 1"""
        y = T.tensor4('y')
        z = y // 1
-        f = theano.function([y], z, mode = self.mode)
+        f = theano.function([y], z, mode=self.mode)
        graph = f.maker.fgraph.toposort()
        divs = [node for node in graph
                if isinstance(node.op, T.elemwise.Elemwise) and
@@ -6538,7 +6507,7 @@ class TestIntDivByOne(unittest.TestCase):
    def test3(self):
        """Simple test case for removing dividing by a tensor of ones"""
        y = T.tensor4('y')
-        z = y // numpy.ones((2,2,2,2))
+        z = y // numpy.ones((2, 2, 2, 2))
        f = theano.function([y], z, mode=self.mode)
        graph = f.maker.fgraph.toposort()
        divs = [node for node in graph
@@ -6549,7 +6518,6 @@ class TestIntDivByOne(unittest.TestCase):

 def test_local_zero_div():
    """Tests 0/x -> 0"""
-    mode = theano.compile.mode.get_default_mode().including("local_zero_div")
    for t in (T.scalar, T.ivector, T.ftensor4):
        x = t('x')
        for op in (T.int_div, T.true_div):

--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
@@ -321,83 +321,8 @@ class TensorType(Type):
    @staticmethod
    def values_eq_approx(a, b, allow_remove_inf=False, allow_remove_nan=False,
                         rtol=None, atol=None):
-        """
-        Parameters
-        ----------
-        allow_remove_inf
-            If True, when there is an inf in a, we allow any value in b in
-            that position. Event -inf
-        allow_remove_nan
-            If True, when there is a nan in a, we allow any value in b in
-            that position. Event +-inf
-        rtol
-            Relative tolerance, passed to _allclose.
-        atol
-            Absolute tolerance, passed to _allclose.
-
-        """
-        if isinstance(a, numpy.ndarray) and isinstance(b, numpy.ndarray):
-            if a.shape != b.shape:
-                return False
-            if a.dtype != b.dtype:
-                return False
-            if str(a.dtype) not in theano.tensor.continuous_dtypes:
-                return numpy.all(a == b)
-            else:
-                cmp = theano.tensor.basic._allclose(a, b, rtol=rtol, atol=atol)
-                if cmp:
-                    # Numpy claims they are close, this is good enough for us.
-                    return True
-                # Numpy is unhappy, but it does not necessarily mean that a and
-                # b are different. Indeed, Numpy does not like missing values
-                # and will return False whenever some are found in a or b.
-                # The proper way would be to use the MaskArray stuff available
-                # in Numpy. However, it looks like it has been added to Numpy's
-                # core recently, so it may not be available to everyone. Thus,
-                # for now we use a home-made recipe, that should probably be
-                # revisited in the future.
-                a_missing = numpy.isnan(a)
-                a_inf = numpy.isinf(a)
-
-                if not (a_missing.any() or (allow_remove_inf and a_inf.any())):
-                    # There are no missing values in a, thus this is not the
-                    # reason why numpy.allclose(a, b) returned False.
-                    _logger.info(
-                        'numpy allclose failed for abs_err %f and rel_err %f',
-                        numpy.max(abs(a - b)),
-                        numpy.max(abs(a - b) / (abs(a) + abs(b))))
-                    return False
-                # The following line is what numpy.allclose bases its decision
-                # upon, according to its documentation.
-                rtol = 1.0000000000000001e-05
-                atol = 1e-8
-                cmp_elemwise = (numpy.absolute(a - b) <=
-                                (atol + rtol * numpy.absolute(b)))
-                # Find places where both a and b have missing values.
-                both_missing = a_missing * numpy.isnan(b)
-
-                # Find places where both a and b have inf of the same sign.
-                both_inf = a_inf * numpy.isinf(b)
-
-                # cmp_elemwise is weird when we have inf and -inf.
-                # set it to False
-                cmp_elemwise = numpy.where(
-                    both_inf & cmp_elemwise,
-                    a == b,
-                    cmp_elemwise)
-
-                # check the sign of the inf
-                both_inf = numpy.where(both_inf, (a == b), both_inf)
-
-                if allow_remove_inf:
-                    both_inf += a_inf
-                if allow_remove_nan:
-                    both_missing += a_missing
-
-                # Combine all information.
-                return (cmp_elemwise + both_missing + both_inf).all()
-
-        return False
+        return values_eq_approx(a, b, allow_remove_inf, allow_remove_nan,
+                                rtol, atol)

    def __hash__(self):
        """Hash equal for same kinds of TensorType"""
@@ -681,16 +606,97 @@ class TensorType(Type):
 theano.compile.ops.expandable_types += (TensorType,)


+def values_eq_approx(a, b, allow_remove_inf=False, allow_remove_nan=False,
+                     rtol=None, atol=None):
+    """
+    Parameters
+    ----------
+    allow_remove_inf
+        If True, when there is an inf in a, we allow any value in b in
+        that position. Event -inf
+    allow_remove_nan
+        If True, when there is a nan in a, we allow any value in b in
+        that position. Event +-inf
+    rtol
+        Relative tolerance, passed to _allclose.
+    atol
+        Absolute tolerance, passed to _allclose.
+
+    """
+    if isinstance(a, numpy.ndarray) and isinstance(b, numpy.ndarray):
+        if a.shape != b.shape:
+            return False
+        if a.dtype != b.dtype:
+            return False
+        if str(a.dtype) not in theano.tensor.continuous_dtypes:
+            return numpy.all(a == b)
+        else:
+            cmp = theano.tensor.basic._allclose(a, b, rtol=rtol, atol=atol)
+            if cmp:
+                # Numpy claims they are close, this is good enough for us.
+                return True
+            # Numpy is unhappy, but it does not necessarily mean that a and
+            # b are different. Indeed, Numpy does not like missing values
+            # and will return False whenever some are found in a or b.
+            # The proper way would be to use the MaskArray stuff available
+            # in Numpy. However, it looks like it has been added to Numpy's
+            # core recently, so it may not be available to everyone. Thus,
+            # for now we use a home-made recipe, that should probably be
+            # revisited in the future.
+            a_missing = numpy.isnan(a)
+            a_inf = numpy.isinf(a)
+
+            if not (a_missing.any() or (allow_remove_inf and a_inf.any())):
+                # There are no missing values in a, thus this is not the
+                # reason why numpy.allclose(a, b) returned False.
+                _logger.info(
+                    'numpy allclose failed for abs_err %f and rel_err %f',
+                    numpy.max(abs(a - b)),
+                    numpy.max(abs(a - b) / (abs(a) + abs(b))))
+                return False
+            # The following line is what numpy.allclose bases its decision
+            # upon, according to its documentation.
+            rtol = 1.0000000000000001e-05
+            atol = 1e-8
+            cmp_elemwise = (numpy.absolute(a - b) <=
+                            (atol + rtol * numpy.absolute(b)))
+            # Find places where both a and b have missing values.
+            both_missing = a_missing * numpy.isnan(b)
+
+            # Find places where both a and b have inf of the same sign.
+            both_inf = a_inf * numpy.isinf(b)
+
+            # cmp_elemwise is weird when we have inf and -inf.
+            # set it to False
+            cmp_elemwise = numpy.where(
+                both_inf & cmp_elemwise,
+                a == b,
+                cmp_elemwise)
+
+            # check the sign of the inf
+            both_inf = numpy.where(both_inf, (a == b), both_inf)
+
+            if allow_remove_inf:
+                both_inf += a_inf
+            if allow_remove_nan:
+                both_missing += a_missing
+
+            # Combine all information.
+            return (cmp_elemwise + both_missing + both_inf).all()
+
+    return False
+
+
 def values_eq_approx_remove_inf(a, b):
-    return TensorType.values_eq_approx(a, b, True)
+    return values_eq_approx(a, b, True)


 def values_eq_approx_remove_nan(a, b):
-    return TensorType.values_eq_approx(a, b, False, True)
+    return values_eq_approx(a, b, False, True)


 def values_eq_approx_remove_inf_nan(a, b):
-    return TensorType.values_eq_approx(a, b, True, True)
+    return values_eq_approx(a, b, True, True)


 def values_eq_approx_always_true(a, b):

--- a/theano/tests/test_flake8.py
+++ b/theano/tests/test_flake8.py
@@ -53,7 +53,6 @@ whitelist_flake8 = [
    "tensor/tests/test_misc.py",
    "tensor/tests/mlp_test.py",
    "tensor/tests/test_opt_uncanonicalize.py",
-    "tensor/tests/test_opt.py",
    "tensor/tests/test_basic.py",
    "tensor/tests/test_blas.py",
    "tensor/tests/test_merge.py",