Merge pull request #1802 from nouiz/f32

Fix buildbot tests error in float32

Merge pull request #1802 from nouiz/f32
b1402c8b · abergeron · 9a44f9bf · e2a6481b · b1402c8b · b1402c8b
--- a/doc/tutorial/loop.txt
+++ b/doc/tutorial/loop.txt
@@ -15,7 +15,7 @@ Scan
 - ``sum()`` could be computed by scanning the *z + x(i)* function over a list, given an initial state of *z=0*.
 - Often a *for* loop can be expressed as a ``scan()`` operation, and ``scan`` is the closest that Theano comes to looping.
 - Advantages of using ``scan`` over *for* loops:
-  
+
  - Number of iterations to be part of the symbolic graph.
  - Minimizes GPU transfers (if GPU is involved).
  - Computes gradients through sequential steps.
@@ -30,34 +30,34 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  import theano
  import theano.tensor as T
  import numpy as np
-  
+
  # defining the tensor variables
  X = T.matrix("X")
  W = T.matrix("W")
  b_sym = T.vector("b_sym")
-  
-  results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W)+b_sym), sequences=X)
-  compute_elementwise = theano.function(inputs = [X, W, b_sym], outputs=[results])
-  
+
+  results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym), sequences=X)
+  compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=[results])
+
  # test values
-  x = np.eye(2)
-  w = np.ones((2,2))
-  b = np.ones((2))
+  x = np.eye(2, dtype=theano.config.floatX)
+  w = np.ones((2, 2), dtype=theano.config.floatX)
+  b = np.ones((2), dtype=theano.config.floatX)
  b[1] = 2
-  
+
  print compute_elementwise(x, w, b)[0]
-  
+
  # comparison with numpy
  print np.tanh(x.dot(w) + b)
-  

-**Scan Example: Computing the sequence x(t) = tanh(x(t-1).dot(W) + y(t).dot(U) + p(T-t).dot(V))**
+
+**Scan Example: Computing the sequence x(t) = tanh(x(t - 1).dot(W) + y(t).dot(U) + p(T - t).dot(V))**

 .. code-block:: python
  import theano
  import theano.tensor as T
  import numpy as np
-  
+
  # define tensor variables
  X = T.vector("X")
  W = T.matrix("W")
@@ -66,30 +66,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  Y = T.matrix("Y")
  V = T.matrix("V")
  P = T.matrix("P")
-  
-  results, updates = theano.scan(lambda 
-            y,p,x_tm1:T.tanh(T.dot(x_tm1,W)+T.dot(y,U)+T.dot(p,V)), 
-            sequences=[Y,P[::-1]], outputs_info=[X])
-  compute_seq = theano.function(inputs = [X, W, Y, U, P, V], outputs=[results])
-  
+
+  results, updates = theano.scan(lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) + T.dot(y, U) + T.dot(p, V)),
+            sequences=[Y, P[::-1]], outputs_info=[X])
+  compute_seq = theano.function(inputs=[X, W, Y, U, P, V], outputs=[results])
+
  # test values
-  x = np.zeros((2))
+  x = np.zeros((2), dtype=theano.config.floatX)
  x[1] = 1
-  w = np.ones((2,2))
-  y = np.ones((5,2))
-  y[0,:] = -3
-  u = np.ones((2,2))
-  p = np.ones((5,2))
-  p[0,:] = 3
-  v = np.ones((2,2))
-  
-  print compute_seq(x,w,y,u,p,v)[0]
-  
+  w = np.ones((2, 2), dtype=theano.config.floatX)
+  y = np.ones((5, 2), dtype=theano.config.floatX)
+  y[0, :] = -3
+  u = np.ones((2, 2), dtype=theano.config.floatX)
+  p = np.ones((5, 2), dtype=theano.config.floatX)
+  p[0, :] = 3
+  v = np.ones((2, 2), dtype=theano.config.floatX)
+
+  print compute_seq(x, w, y, u, p, v)[0]
+
  # comparison with numpy
-  x_res = np.zeros((5,2))
+  x_res = np.zeros((5, 2), dtype=theano.config.floatX)
  x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
-  for i in range(1,5):
-    x_res[i] = np.tanh(x_res[i-1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
+  for i in range(1, 5):
+    x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
+  print x_res

 **Scan Example: Computing norms of lines of X**

@@ -97,18 +97,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  import theano
  import theano.tensor as T
  import numpy as np
-  
+
  # define tensor variable
  X = T.matrix("X")
-  results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), sequences=[X])
-  compute_norm_lines = theano.function(inputs = [X], outputs=[results])
-  
+  results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X])
+  compute_norm_lines = theano.function(inputs=[X], outputs=[results])
+
  # test value
-  x = np.diag(np.arange(1,6),1)
+  x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
  print compute_norm_lines(x)[0]
-  
+
  # comparison with numpy
-  print np.sqrt((x**2).sum(1))
+  print np.sqrt((x ** 2).sum(1))

 **Scan Example: Computing norms of columns of X**

@@ -116,18 +116,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  import theano
  import theano.tensor as T
  import numpy as np
-  
+
  # define tensor variable
  X = T.matrix("X")
-  results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), sequences=[X.T])
-  compute_norm_cols = theano.function(inputs = [X], outputs=[results])
-  
+  results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X.T])
+  compute_norm_cols = theano.function(inputs=[X], outputs=[results])
+
  # test value
-  x = np.diag(np.arange(1,6),1)
+  x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
  print compute_norm_cols(x)[0]
-  
+
  # comparison with numpy
-  print np.sqrt((x**2).sum(0))
+  print np.sqrt((x ** 2).sum(0))

 **Scan Example: Computing trace of X**

@@ -136,30 +136,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  import theano.tensor as T
  import numpy as np
  floatX = "float32"
-  
+
  # define tensor variable
  X = T.matrix("X")
-  results, updates = theano.scan(lambda i, j, t_f:T.cast(X[i,j]+t_f, floatX), \
-                    sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])], \
+  results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] + t_f, floatX),
+                    sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])],
                    outputs_info=np.asarray(0., dtype=floatX))
  result = results[-1]
-  compute_trace = theano.function(inputs = [X], outputs=[result])
-  
+  compute_trace = theano.function(inputs=[X], outputs=[result])
+
  # test value
-  x = np.eye(5)
-  x[0] = np.arange(5)
-  compute_trace(x)[0]
-  
+  x = np.eye(5, dtype=theano.config.floatX)
+  x[0] = np.arange(5, dtype=theano.config.floatX)
+  print compute_trace(x)[0]
+
  # comparison with numpy
  print np.diagonal(x).sum()

-**Scan Example: Computing the sequence x(t) = x(t-2).dot(U) + x(t-1).dot(V) +  tanh(x(t-1).dot(W)  + b)**
+**Scan Example: Computing the sequence x(t) = x(t - 2).dot(U) + x(t - 1).dot(V) +  tanh(x(t - 1).dot(W)  + b)**

 .. code-block:: python
  import theano
  import theano.tensor as T
  import numpy as np
-  
+
  # define tensor variables
  X = T.matrix("X")
  W = T.matrix("W")
@@ -168,31 +168,30 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  V = T.matrix("V")
  n_sym = T.iscalar("n_sym")

-  results, updates = theano.scan(lambda x_tm2,x_tm1:T.dot(x_tm2,U) + T.dot(x_tm1,V) \
-                                + T.tanh(T.dot(x_tm1,W) + b_sym), \
-                      n_steps=n_sym, outputs_info=[dict(initial = X, taps = [-2,-1])])
-  compute_seq2 = theano.function(inputs = [X, U, V, W, b_sym, n_sym], outputs=[results])
-  
+  results, updates = theano.scan(lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b_sym),
+                      n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
+  compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=[results])
+
  # test values
-  x = np.zeros((2,2)) # the initial value must be able to return x[-2]
-  x[1,1] = 1
-  w = 0.5*np.ones((2,2))
-  u = 0.5*(np.ones((2,2))-np.eye(2))
-  v = 0.5*np.ones((2,2))
+  x = np.zeros((2, 2), dtype=theano.config.floatX) # the initial value must be able to return x[-2]
+  x[1, 1] = 1
+  w = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
+  u = 0.5 * (np.ones((2, 2), dtype=theano.config.floatX) - np.eye(2, dtype=theano.config.floatX))
+  v = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
  n = 10
-  b = np.ones((2))
-  
-  print compute_seq2(x,u,v,w,b,n)
-  
+  b = np.ones((2), dtype=theano.config.floatX)
+
+  print compute_seq2(x, u, v, w, b, n)
+
  # comparison with numpy
-  x_res = numpy.zeros((10,2))
-  x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b)
-  x_res[1] = x[1].dot(u) + x_res[0].dot(v) + numpy.tanh(x_res[0].dot(w) + b)
-  x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \
-            + numpy.tanh(x_res[1].dot(w) + b)
-  for i in range(2,10):
-    x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) \
-            + numpy.tanh(x_res[i-1].dot(w) + b))
+  x_res = np.zeros((10, 2))
+  x_res[0] = x[0].dot(u) + x[1].dot(v) + np.tanh(x[1].dot(w) + b)
+  x_res[1] = x[1].dot(u) + x_res[0].dot(v) + np.tanh(x_res[0].dot(w) + b)
+  x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) + np.tanh(x_res[1].dot(w) + b)
+  for i in range(2, 10):
+    x_res[i] = (x_res[i - 2].dot(u) + x_res[i - 1].dot(v) +
+                np.tanh(x_res[i - 1].dot(w) + b))
+  print x_res

 **Scan Example: Computing the Jacobian of y = tanh(v.dot(A)) wrt x**

@@ -200,24 +199,24 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
  import theano
  import theano.tensor as T
  import numpy as np
-  
+
  # define tensor variables
  v = T.vector()
  A = T.matrix()
-  y = T.tanh(T.dot(v,A))
-  results, updates = theano.scan(lambda i:T.grad(y[i], v), sequences = [T.arange(y.shape[0])])
-  compute_jac_t = theano.function([A,v], [results], allow_input_downcast = True) # shape (d_out, d_in)
-  
+  y = T.tanh(T.dot(v, A))
+  results, updates = theano.scan(lambda i: T.grad(y[i], v), sequences=[T.arange(y.shape[0])])
+  compute_jac_t = theano.function([A, v], [results], allow_input_downcast=True) # shape (d_out, d_in)
+
  # test values
-  x = np.eye(5)[0]
-  w = np.eye(5,3)
-  w[2] = np.ones((3))
-  print compute_jac_t(w,x)[0]
-  
+  x = np.eye(5, dtype=theano.config.floatX)[0]
+  w = np.eye(5, 3, dtype=theano.config.floatX)
+  w[2] = np.ones((3), dtype=theano.config.floatX)
+  print compute_jac_t(w, x)[0]
+
  # compare with numpy
-  print ((1 - np.tanh(x.dot(w))**2)*w).T
+  print ((1 - np.tanh(x.dot(w)) ** 2) * w).T

-Note that we need to iterate over the indices of ``y`` and not over the elements of ``y``. The reason is that scan create a placeholder variable for its internal function and this placeholder variable does not have the same dependencies than the variables that will replace it. 
+Note that we need to iterate over the indices of ``y`` and not over the elements of ``y``. The reason is that scan create a placeholder variable for its internal function and this placeholder variable does not have the same dependencies than the variables that will replace it.

 **Scan Example: Accumulate number of loop during a scan**

@@ -225,46 +224,46 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
  import theano
  import theano.tensor as T
  import numpy as np
-  
+
  # define shared variables
  k = theano.shared(0)
  n_sym = T.iscalar("n_sym")
-  
-  results, updates = theano.scan(lambda:{k:(k+1)}, n_steps=n_sym)
-  accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast = True)
-  
+
+  results, updates = theano.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
+  accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast=True)
+
  k.get_value()
  accumulator(5)
  k.get_value()

-**Scan Example: Computing tanh(v.dot(W) + b)*d where b is binomial**
+**Scan Example: Computing tanh(v.dot(W) + b) * d where b is binomial**

 .. code-block:: python
  import theano
  import theano.tensor as T
  import numpy as np
-  
+
  # define tensor variables
  X = T.matrix("X")
  W = T.matrix("W")
  b_sym = T.vector("b_sym")
-  
+
  # define shared random stream
  trng = T.shared_randomstreams.RandomStreams(1234)
  d=trng.binomial(size=W[1].shape)
-  
-  results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W)+b_sym)*d, sequences=X)
-  compute_with_bnoise = theano.function(inputs = [X, W, b_sym], outputs=[results], \
-                            updates=updates, allow_input_downcast = True)
-  x = np.eye(10,2)
-  w = np.ones((2,2))
-  b = np.ones((2))
-  
+
+  results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d, sequences=X)
+  compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=[results],
+                            updates=updates, allow_input_downcast=True)
+  x = np.eye(10, 2, dtype=theano.config.floatX)
+  w = np.ones((2, 2), dtype=theano.config.floatX)
+  b = np.ones((2), dtype=theano.config.floatX)
+
  print compute_with_bnoise(x, w, b)

 Note that if you want to use a random variable ``d`` that will not be updated through scan loops, you should pass this variable as a ``non_sequences`` arguments. 

-**Scan Example: Computing pow(A,k)**
+**Scan Example: Computing pow(A, k)**

 .. code-block:: python

@@ -286,11 +285,11 @@ Note that if you want to use a random variable ``d`` that will not be updated th
  # Scan has provided us with A ** 1 through A ** k.  Keep only the last
  # value. Scan notices this and does not waste memory saving them.
  final_result = result[-1]
-  
+
  power = theano.function(inputs=[A, k], outputs=final_result,
                        updates=updates)
-  
-  print power(range(10),2)
+
+  print power(range(10), 2)
  #[  0.   1.   4.   9.  16.  25.  36.  49.  64.  81.]



--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -1146,7 +1146,8 @@ class test_fusion(unittest.TestCase):
        #we need the optimisation enabled and the canonicalize.
        #the canonicalize is needed to merge multiplication/addition by constant.
        mode._optimizer = mode._optimizer.including(
-            'local_elemwise_fusion', 'canonicalize')
+            'local_elemwise_fusion', 'composite_elemwise_fusion',
+            'canonicalize')
        self.do(mode, shared, shp)

    @attr('slow')
@@ -1156,7 +1157,8 @@ class test_fusion(unittest.TestCase):
        #we need the optimisation enabled and the canonicalize.
        #the canonicalize is needed to merge multiplication/addition by constant.
        mode._optimizer = mode._optimizer.including(
-            'local_elemwise_fusion', 'canonicalize')
+            'local_elemwise_fusion', 'composite_elemwise_fusion',
+            'canonicalize')
        self.do(mode, shared, shp)

    def test_gpu_fusion(self):
@@ -1164,10 +1166,12 @@ class test_fusion(unittest.TestCase):
        #we need the optimisation enabled, debug do this.
        if theano.config.mode == "FAST_COMPILE":
            mode = theano.compile.mode.get_mode("FAST_RUN").including(
-                    'local_elemwise_fusion', 'canonicalize', 'gpu')
+                'local_elemwise_fusion',  'composite_elemwise_fusion',
+                'canonicalize', 'gpu')
        else:
            mode = theano.compile.mode.get_default_mode().including(
-                    'local_elemwise_fusion', 'canonicalize', 'gpu')
+                'local_elemwise_fusion',  'composite_elemwise_fusion',
+                'canonicalize', 'gpu')
        import theano.sandbox.cuda as cuda
        if not cuda.cuda_available:
            raise SkipTest("cuda not available")
@@ -1179,10 +1183,12 @@ class test_fusion(unittest.TestCase):
        #we need the optimisation enabled, debug do this.
        if theano.config.mode == "FAST_COMPILE":
            mode = theano.compile.mode.get_mode("FAST_RUN").including(
-                    'local_elemwise_fusion', 'canonicalize', 'gpu')
+                'local_elemwise_fusion',  'composite_elemwise_fusion',
+                'canonicalize', 'gpu')
        else:
            mode = theano.compile.mode.get_default_mode().including(
-                    'local_elemwise_fusion', 'canonicalize', 'gpu')
+                'local_elemwise_fusion',  'composite_elemwise_fusion',
+                'canonicalize', 'gpu')
        import theano.sandbox.cuda as cuda
        if not cuda.cuda_available:
            raise SkipTest("cuda not available")
@@ -1278,7 +1284,8 @@ class test_fusion(unittest.TestCase):
        #we need the optimisation enabled and the canonicalize.
        #the canonicalize is needed to merge multiplication/addition by constant.
        mode._optimizer = mode._optimizer.including(
-            'local_elemwise_fusion', 'canonicalize', 'inplace')
+            'local_elemwise_fusion',  'composite_elemwise_fusion',
+            'canonicalize', 'inplace')

        x, y, z = dmatrices('xyz')
        f = theano.function([x, y, z], tensor.dot(x, y) + x + y + z, mode=mode)

--- a/theano/tests/test_tutorial.py
+++ b/theano/tests/test_tutorial.py
@@ -1137,213 +1137,209 @@ class T_graphstructures(unittest.TestCase):
        assert e.owner.inputs[1].owner.inputs[0] is y
        assert e.owner.inputs[1].owner.inputs[1] is z

+
 class T_scan(unittest.TestCase):
    ## All tests here belong to
    ## http://deeplearning.net/software/theano/tutorial/loop.html
    ## Theano/doc/tutorial/loop.txt
    ## Any change you do here also add it to the tutorial !
-    
+
    def test_elemwise(self):
-        
-          # defining the tensor variables
-          X = T.matrix("X")
-          W = T.matrix("W")
-          b_sym = T.vector("b_sym")
-  
-          results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W)+b_sym), \
-                    sequences=X)
-          compute_elementwise = theano.function(inputs = [X, W, b_sym], \
-                    outputs=[results])
-  
-          # test values
-          x = numpy.eye(2)
-          w = numpy.ones((2,2))
-          b = numpy.ones((2))
-          b[1] = 2
-  
-          print "Scan results:", compute_elementwise(x, w, b)[0]
-  
-          # comparison with numpy
-          print "Numpy results:", numpy.tanh(x.dot(w) + b)
-    
+        # defining the tensor variables
+        X = T.matrix("X")
+        W = T.matrix("W")
+        b_sym = T.vector("b_sym")
+
+        results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym),
+                                       sequences=X)
+        compute_elementwise = theano.function(inputs=[X, W, b_sym],
+                                              outputs=[results])
+
+        # test values
+        x = numpy.eye(2, dtype=theano.config.floatX)
+        w = numpy.ones((2, 2), dtype=theano.config.floatX)
+        b = numpy.ones((2), dtype=theano.config.floatX)
+        b[1] = 2
+
+        print "Scan results:", compute_elementwise(x, w, b)[0]
+
+        # comparison with numpy
+        print "Numpy results:", numpy.tanh(x.dot(w) + b)
+
    def test_sequence(self):
-        
-          # define tensor variables
-          X = T.vector("X")
-          W = T.matrix("W")
-          b_sym = T.vector("b_sym")
-          U = T.matrix("U")
-          Y = T.matrix("Y")
-          V = T.matrix("V")
-          P = T.matrix("P")
-  
-          results, updates = theano.scan(lambda \
-                        y,p,x_tm1:T.tanh(T.dot(x_tm1,W) + \
-                        T.dot(y,U)+T.dot(p,V)), \
-                    sequences=[Y,P[::-1]], outputs_info=[X])
-            
-          compute_seq = theano.function(inputs = [X, W, Y, U, P, V], \
-                    outputs=[results])
-  
-          # test values
-          x = numpy.zeros((2))
-          x[1] = 1
-          w = numpy.ones((2,2))
-          y = numpy.ones((5,2))
-          y[0,:] = -3
-          u = numpy.ones((2,2))
-          p = numpy.ones((5,2))
-          p[0,:] = 3
-          v = numpy.ones((2,2))
-  
-          print "Scan results", compute_seq(x,w,y,u,p,v)[0]
-  
-          # comparison with numpy
-          x_res = numpy.zeros((5,2))
-          x_res[0] = numpy.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
-          for i in range(1,5):
-            x_res[i] = numpy.tanh(x_res[i-1].dot(w) \
-                        + y[i].dot(u) + p[4-i].dot(v))
-        
-          print "Numpy results:", x_res
-    
+        # define tensor variables
+        X = T.vector("X")
+        W = T.matrix("W")
+        b_sym = T.vector("b_sym")
+        U = T.matrix("U")
+        Y = T.matrix("Y")
+        V = T.matrix("V")
+        P = T.matrix("P")
+
+        results, updates = theano.scan(
+            lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) +
+                                       T.dot(y, U) + T.dot(p, V)),
+            sequences=[Y, P[::-1]], outputs_info=[X])
+
+        compute_seq = theano.function(inputs=[X, W, Y, U, P, V],
+                                      outputs=[results])
+
+        # test values
+        x = numpy.zeros((2), dtype=theano.config.floatX)
+        x[1] = 1
+        w = numpy.ones((2, 2), dtype=theano.config.floatX)
+        y = numpy.ones((5, 2), dtype=theano.config.floatX)
+        y[0, :] = -3
+        u = numpy.ones((2, 2), dtype=theano.config.floatX)
+        p = numpy.ones((5, 2), dtype=theano.config.floatX)
+        p[0, :] = 3
+        v = numpy.ones((2, 2), dtype=theano.config.floatX)
+
+        print "Scan results", compute_seq(x, w, y, u, p, v)[0]
+
+        # comparison with numpy
+        x_res = numpy.zeros((5, 2), dtype=theano.config.floatX)
+        x_res[0] = numpy.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
+        for i in range(1, 5):
+            x_res[i] = numpy.tanh(x_res[i-1].dot(w) +
+                                  y[i].dot(u) + p[4-i].dot(v))
+
+        print "Numpy results:", x_res
+
    def test_norm(self):
-          # define tensor variable
-          X = T.matrix("X")
-          results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), \
-                            sequences=[X])
-          compute_norm_lines = theano.function(inputs = [X], outputs=[results])
-          
-          results, updates = theano.scan(lambda x_i:T.sqrt((x_i**2).sum()), \
-                            sequences=[X.T])
-          compute_norm_cols = theano.function(inputs = [X], outputs=[results])
-  
-          # test value
-          x = numpy.diag(numpy.arange(1,6),1)
-          print "Scan results:", compute_norm_lines(x)[0], \
+        # define tensor variable
+        X = T.matrix("X")
+        results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
+                                       sequences=[X])
+        compute_norm_lines = theano.function(inputs=[X], outputs=[results])
+
+        results, updates = theano.scan(lambda x_i: T.sqrt((x_i**2).sum()),
+                                       sequences=[X.T])
+        compute_norm_cols = theano.function(inputs=[X], outputs=[results])
+
+        # test value
+        x = numpy.diag(numpy.arange(1, 6, dtype=theano.config.floatX), 1)
+        print "Scan results:", compute_norm_lines(x)[0], \
                            compute_norm_cols(x)[0]
-  
-          # comparison with numpy
-          print "Numpy results:", numpy.sqrt((x**2).sum(1)), \
+
+        # comparison with numpy
+        print "Numpy results:", numpy.sqrt((x**2).sum(1)), \
                            numpy.sqrt((x**2).sum(0))
-    
+
    def test_trace(self):
-        
-          # define tensor variable
-          X = T.matrix("X")
-          results, updates = theano.scan(lambda i, j, t_f:T.cast(X[i,j] + \
-                                t_f, theano.config.floatX), \
-                            sequences=[T.arange(X.shape[0]), \
-                                T.arange(X.shape[1])], \
-                            outputs_info=numpy.asarray(0., \
-                                dtype=theano.config.floatX))
-            
-          result = results[-1]
-          compute_trace = theano.function(inputs = [X], outputs=[result])
-  
-          # test value
-          x = numpy.eye(5)
-          x[0] = numpy.arange(5)
-          print "Scan results:", compute_trace(x)[0]
-  
-          # comparison with numpy
-          print "Numpy results:", numpy.diagonal(x).sum()
+        # define tensor variable
+        X = T.matrix("X")
+        results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i,j] +
+                                                                t_f, theano.config.floatX),
+                                       sequences=[T.arange(X.shape[0]),
+                                                  T.arange(X.shape[1])],
+                                       outputs_info=numpy.asarray(
+                                           0., dtype=theano.config.floatX))
+
+        result = results[-1]
+        compute_trace = theano.function(inputs=[X], outputs=[result])
+
+        # test value
+        x = numpy.eye(5, dtype=theano.config.floatX)
+        x[0] = numpy.arange(5, dtype=theano.config.floatX)
+        print "Scan results:", compute_trace(x)[0]
+
+        # comparison with numpy
+        print "Numpy results:", numpy.diagonal(x).sum()

    def test_taps(self):
-    
-          # define tensor variables
-          X = T.matrix("X")
-          W = T.matrix("W")
-          b_sym = T.vector("b_sym")
-          U = T.matrix("U")
-          V = T.matrix("V")
-          n_sym = T.iscalar("n_sym")
-
-          results, updates = theano.scan(lambda x_tm2,x_tm1:T.dot(x_tm2,U) \
-                        + T.dot(x_tm1,V) + T.tanh(T.dot(x_tm1,W) + b_sym), \
-                    n_steps=n_sym, \
-                    outputs_info=[dict(initial = X, taps = [-2,-1])])
-        
-          compute_seq2 = theano.function(inputs = [X, U, V, W, b_sym, \
-                    n_sym], outputs=[results])
-  
-          # test values
-          x = numpy.zeros((2,2)) 
-          # the initial value must be able to return x[-2]
-          x[1,1] = 1
-          w = 0.5*numpy.ones((2,2))
-          u = 0.5*(numpy.ones((2,2))-numpy.eye(2))
-          v = 0.5*numpy.ones((2,2))
-          n = 10
-          b = numpy.ones((2))
-  
-          print "Scan results:", compute_seq2(x,u,v,w,b,n)
-  
-          # comparison with numpy
-          x_res = numpy.zeros((10,2))
-          x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b)
-          x_res[1] = x[1].dot(u) + x_res[0].dot(v) \
+        # define tensor variables
+        X = T.matrix("X")
+        W = T.matrix("W")
+        b_sym = T.vector("b_sym")
+        U = T.matrix("U")
+        V = T.matrix("V")
+        n_sym = T.iscalar("n_sym")
+
+        results, updates = theano.scan(
+            lambda x_tm2,x_tm1: T.dot(x_tm2,U) + T.dot(x_tm1,V) + T.tanh(T.dot(x_tm1,W) + b_sym),
+            n_steps=n_sym,
+            outputs_info=[dict(initial=X, taps=[-2, -1])])
+
+        compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym],
+                                       outputs=[results])
+
+        # test values
+        x = numpy.zeros((2, 2), dtype=theano.config.floatX)
+        # the initial value must be able to return x[-2]
+        x[1, 1] = 1
+        w = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
+        u = 0.5 * (numpy.ones((2, 2), dtype=theano.config.floatX) -
+                   numpy.eye(2, dtype=theano.config.floatX))
+        v = 0.5 * numpy.ones((2, 2), dtype=theano.config.floatX)
+        n = 10
+        b = numpy.ones((2), dtype=theano.config.floatX)
+
+        print "Scan results:", compute_seq2(x, u, v, w, b, n)
+
+        # comparison with numpy
+        x_res = numpy.zeros((10, 2), dtype=theano.config.floatX)
+        x_res[0] = x[0].dot(u) + x[1].dot(v) + numpy.tanh(x[1].dot(w) + b)
+        x_res[1] = x[1].dot(u) + x_res[0].dot(v) \
                        + numpy.tanh(x_res[0].dot(w) + b)
-          x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \
-                    + numpy.tanh(x_res[1].dot(w) + b)
-          for i in range(2,10):
-            x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) \
-                    + numpy.tanh(x_res[i-1].dot(w) + b))
-          
-          print "Numpy results:", x_res
+        x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) \
+                   + numpy.tanh(x_res[1].dot(w) + b)
+        for i in range(2, 10):
+            x_res[i] = (x_res[i-2].dot(u) + x_res[i-1].dot(v) +
+                        numpy.tanh(x_res[i-1].dot(w) + b))
+
+        print "Numpy results:", x_res

    def test_jacobian(self):
-    
-          # define tensor variables
-          v = T.vector()
-          A = T.matrix()
-          y = T.tanh(T.dot(v,A))
-          results, updates = theano.scan(lambda i:T.grad(y[i], v), \
-                    sequences = [T.arange(y.shape[0])])
-          compute_jac_t = theano.function([A,v], [results], \
-                allow_input_downcast = True) # shape (d_out, d_in)
-  
-          # test values
-          x = numpy.eye(5)[0]
-          w = numpy.eye(5,3)
-          w[2] = numpy.ones((3))
-          print "Scan results:", compute_jac_t(w,x)[0]
-  
-          # compare with numpy
-          print "Numpy results:", ((1 - numpy.tanh(x.dot(w))**2)*w).T
+        # define tensor variables
+        v = T.vector()
+        A = T.matrix()
+        y = T.tanh(T.dot(v, A))
+        results, updates = theano.scan(lambda i: T.grad(y[i], v),
+                                       sequences=[T.arange(y.shape[0])])
+        compute_jac_t = theano.function([A, v], [results],
+                                        allow_input_downcast=True)  # shape (d_out, d_in)
+
+        # test values
+        x = numpy.eye(5)[0]
+        w = numpy.eye(5, 3)
+        w[2] = numpy.ones((3))
+        print "Scan results:", compute_jac_t(w, x)[0]
+
+        # compare with numpy
+        print "Numpy results:", ((1 - numpy.tanh(x.dot(w))**2)*w).T

    def test_accumulator(self):
-          # define shared variables
-          k = theano.shared(0)
-          n_sym = T.iscalar("n_sym")
-  
-          results, updates = theano.scan(lambda:{k:(k+1)}, n_steps=n_sym)
-          accumulator = theano.function([n_sym], [], updates=updates, \
-                allow_input_downcast = True)
-  
-          print "Before 5 steps:", k.get_value()
-          accumulator(5)
-          print "After 5 steps:", k.get_value()
-    
-    def test_random(self):
-          # define tensor variables
-          X = T.matrix("X")
-          W = T.matrix("W")
-          b_sym = T.vector("b_sym")
-  
-          # define shared random stream
-          trng = T.shared_randomstreams.RandomStreams(1234)
-          d=trng.binomial(size=W[1].shape)
-  
-          results, updates = theano.scan(lambda v:T.tanh(T.dot(v,W) \
-                        + b_sym)*d, sequences=X)
-          compute_with_bnoise = theano.function(inputs = [X, W, b_sym], \
-                        outputs=[results], \
-                        updates=updates, \
-                        allow_input_downcast = True)
-          x = numpy.eye(10,2)
-          w = numpy.ones((2,2))
-          b = numpy.ones((2))
-  
-          print compute_with_bnoise(x, w, b)
+        # define shared variables
+        k = theano.shared(0)
+        n_sym = T.iscalar("n_sym")

+        results, updates = theano.scan(lambda: {k: (k + 1)}, n_steps=n_sym)
+        accumulator = theano.function([n_sym], [], updates=updates,
+                                      allow_input_downcast=True)
+
+        print "Before 5 steps:", k.get_value()
+        accumulator(5)
+        print "After 5 steps:", k.get_value()
+
+    def test_random(self):
+        # define tensor variables
+        X = T.matrix("X")
+        W = T.matrix("W")
+        b_sym = T.vector("b_sym")
+
+        # define shared random stream
+        trng = T.shared_randomstreams.RandomStreams(1234)
+        d = trng.binomial(size=W[1].shape)
+
+        results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d,
+                                       sequences=X)
+        compute_with_bnoise = theano.function(inputs=[X, W, b_sym],
+                                              outputs=[results],
+                                              updates=updates,
+                                              allow_input_downcast = True)
+        x = numpy.eye(10,2)
+        w = numpy.ones((2,2))
+        b = numpy.ones((2))
+
+        print compute_with_bnoise(x, w, b)