Updated the interface and documentation of scan following James suggestions; I…

Updated the interface and documentation of scan following James suggestions; I also implemented a map function using scan

Updated the interface and documentation of scan following James suggestions; I…
78d2b9a7 · Razvan Pascanu · 90cf38f3 · 78d2b9a7 · 78d2b9a7 · 78d2b9a7
--- a/doc/library/scan.txt
+++ b/doc/library/scan.txt
@@ -32,8 +32,7 @@ The equivalent Theano code would be
  # Symbolic description of the result
  result,updates = theano.scan(fn = lambda x_tm1,A: x_tm1*A,\
-                       sequences      = [], \
+                       info_outputs = T.ones_like(A),\
-                       initial_states = T.ones_like(A),\
                       non_sequences  = A, \
                       n_steps        = k)
@@ -46,13 +45,12 @@ construct a function (using a lambda expression) that given `x_tm1` and
 is the value of our output at time step ``t-1``. Therefore 
 ``x_t`` (value of output at time `t`) is `A` times value of output 
 at `t-1`. 
-Next we assign an empy list to ``sequences`` (since we do not need to
+Next we initialize the output as a tensor with same
-iterate over anything) and initialize the output as a tensor with same
+shape as A filled with ones. We give A to scan as a non sequence parameter  and
-shape as A filled with ones. We give A as a non sequence parameter  and
+specify the number of steps k to iterate over our lambda expression. 
-tell scan to iterate for k steps. 
 Scan will return a tuple, containing our result (``result``) and a
-dictionary of updates ( empty for this example). Note that the result 
+dictionary of updates ( empty in this case). Note that the result 
 is not a matrix, but a 3D tensor containing the value of ``A**k`` for 
 each step. We want the last value ( after k steps ) so we compile 
 a function to return just that. 
@@ -69,8 +67,8 @@ that our RNN is defined as follows :
  y(n) = W^{out} x(n- 3)
-Note that this network is far away from a classical recurrent neural
+Note that this network is far from a classical recurrent neural
-network and might be in practice useless. The reason we defined as such
+network and might be useless. The reason we defined as such
 is to better ilustrate the features of scan. 
 In this case we have a sequence over which we need to iterate ``u``, 
@@ -89,12 +87,15 @@ construct a function that computes one iteration step :
    return [x_t, y_t]
+As naming convention for the variables we used ``a_tmb`` to mean ``a`` at 
+``t-b`` and ``a_tpb`` to be ``a`` at ``t+b``. 
 Note the order in which the parameters are given, and in which the
 result is returned. Try to respect cronological order among 
-the taps ( time slices of sequences or outputs) used. In practice what
+the taps ( time slices of sequences or outputs) used. For scan is crucial only
-is crucial to happen for the computation to work is to give the slices 
+for the variables representing the different time taps to be in the same order
-in the same order as provided in the ``sequence_taps``/``outputs_taps`` dictionaries and to have same
+as the one in which these taps are given. Also, not only taps should respect
-order of inputs here as when applying scan. Given that we have all 
+an order, but also variables, since this is how scan figures out what should 
+be represented by what. Given that we have all 
 the Theano variables needed we construct our RNN as follows : 
 .. code-block:: python
@@ -106,12 +107,10 @@ the Theano variables needed we construct our RNN as follows :
                   # y[-1]
-   ([x_vals, y_vals],updates) = theano.scan(fn             = oneStep, \
+   ([x_vals, y_vals],updates) = theano.scan(fn = oneStep, \
-                                sequences      = [u], \
+                                sequences    = dict(input = u, taps= [-4,-0]), \
-                                initial_states = [x0,y0], \
+                                info_outputs = [dict(initial = x0, taps = [-3,-1]),y0], \
-                                non_sequences  = [W,W_in_1,W_in_2,W_feedback, W_out], \
+                                non_sequences  = [W,W_in_1,W_in_2,W_feedback, W_out])
-                                sequences_taps = {0:[-4,0] },\
-                                outputs_taps   = {0:[-3,-1] },)
        # for second input y, scan adds -1 in output_taps by default
@@ -153,7 +152,7 @@ the following:
 sample = theano.tensor.vector()
- values, updates = theano.scan( OneStep, [],sample, [], n_steps = 10 )
+ values, updates = theano.scan( OneStep, info_outputs = sample, n_steps = 10 )
 gibbs10 = theano.function([sample], values[-1], updates = updates)
@@ -177,7 +176,7 @@ afterwards. Look at this example :
 .. code-block:: python
 a = theano.shared(1)
- values,updates = theano.scan( lambda : {a:a+1}, [],[],[], n_steps = 10 )
+ values,updates = theano.scan( lambda : {a:a+1}, n_steps = 10 )
 In this case the lambda expression does not require any input parameters 
 and returns an update dictionary which tells how ``a`` should be updated

--- a/theano/scan.py
+++ b/theano/scan.py
--- a/theano/tests/test_scan.py
+++ b/theano/tests/test_scan.py
@@ -103,7 +103,7 @@ class T_Scan(unittest.TestCase):
    # generator network, only one output , type scalar ; no sequence or 
    # non sequence arguments
-    def test_1(self):
+    def test_generator_one_output_scalar(self):
      def f_pow2(x_tm1):
@@ -117,7 +117,7 @@ class T_Scan(unittest.TestCase):
      assert(compareArrays(f1(1,3), [2,4,8]))
    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars
-    def test_2(self):
+    def test_one_sequence_one_output_weights(self):
        def f_rnn(u_t,x_tm1,W_in, W):
@@ -138,9 +138,9 @@ class T_Scan(unittest.TestCase):
    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars; using shared variables
-    def test_3(self):
+    def test_one_sequence_one_output_weights_shared(self):
-        u    = theano.tensor.dvector()
+        u    = theano.tensor.dvector() 
        x0   = theano.tensor.dscalar()
        W_in = theano.shared(.1, name = 'w_in')
        W    = theano.shared(1., name ='w')
@@ -158,7 +158,7 @@ class T_Scan(unittest.TestCase):
    # some rnn with multiple outputs and multiple inputs; other dimension 
    # instead of scalars/vectors
-    def test_4(self):
+    def test_multiple_inputs_multiple_outputs(self):
        W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
        W     = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
@@ -191,7 +191,7 @@ class T_Scan(unittest.TestCase):
    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars; using shared variables and past 
    # taps (sequences and outputs)
-    def test_5(self):
+    def test_using_taps_input_output(self):
        u    = theano.tensor.dvector()
        x0   = theano.tensor.dvector()
@@ -201,8 +201,8 @@ class T_Scan(unittest.TestCase):
        def f_rnn_shared(u_tm2, x_tm1, x_tm2):
            return u_tm2*W_in+x_tm1*W+x_tm2
-        Y, updates = theano.scan(f_rnn_shared, u,x0, [], \
+        Y, updates = theano.scan(f_rnn_shared, dict(input=u, taps=-2), 
-                 sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})
+                 dict(initial = x0, taps = [-1,-2]), [])
        f7   = theano.function([u,x0], Y, updates = updates)
        v_u  = numpy.asarray([1.,2.,3.,4.])
@@ -213,7 +213,7 @@ class T_Scan(unittest.TestCase):
    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars; using shared variables and past 
    # taps (sequences and outputs) and future taps for sequences
-    def test_6(self):
+    def test_past_future_taps_shared(self):
        u    = theano.tensor.dvector()
        x0   = theano.tensor.dvector()
@@ -223,8 +223,8 @@ class T_Scan(unittest.TestCase):
        def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
            return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
-        Y,updts = theano.scan(f_rnn_shared, u,x0, [], \
+        Y,updts = theano.scan(f_rnn_shared, dict( input = u, taps=[-2,2]),\
-                 sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})
+                 dict(initial = x0, taps = [-1,-2]), [])
        f8   = theano.function([u,x0], Y, updates = updts)
        v_u  = numpy.array([1.,2.,3.,4.,5.,6.])
@@ -234,7 +234,7 @@ class T_Scan(unittest.TestCase):
        assert (compareArrays( out, f8(v_u, v_x0) ) )
    # simple rnn ; compute inplace
-    def test_7(self):
+    def test_inplace(self):
        u    = theano.tensor.dvector()
        mu   = theano.Param( u, mutable = True)
@@ -244,8 +244,7 @@ class T_Scan(unittest.TestCase):
        def f_rnn_shared(u_t, x_tm1):
            return u_t*W_in + x_tm1*W
-        Y, updts = theano.scan(f_rnn_shared, u, x0,[], \
+        Y, updts = theano.scan(f_rnn_shared, u, dict( initial = x0, inplace = u),[] )
-                    inplace_map={0:0} )
        f9   = theano.function([mu,x0], Y , updates = updts)
        v_u  = numpy.array([1.,2.,3.])
        v_x0 = numpy.array(1.)
@@ -257,7 +256,7 @@ class T_Scan(unittest.TestCase):
        assert (compareArrays(v_u, out))
    # Shared variable with updates
-    def test_8(self):
+    def test_shared_arguments_with_updates(self):
       W1_vals = numpy.random.rand(20,30)
       W2_vals = numpy.random.rand(30,20)
       u1_vals = numpy.random.rand(3,20)
@@ -266,11 +265,11 @@ class T_Scan(unittest.TestCase):
       y1_vals = numpy.random.rand(20)
       y2_vals = numpy.random.rand(30)
-       W1 = theano.shared(W1_vals)
+       W1 = theano.shared(W1_vals,'W1')
-       W2 = theano.shared(W2_vals)
+       W2 = theano.shared(W2_vals,'W2')
-       u1 = theano.shared(u1_vals)
+       u1 = theano.shared(u1_vals,'u1')
-       y1 = theano.shared(y1_vals)
+       y1 = theano.shared(y1_vals,'y1')
       def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
            y0_t = theano.dot(theano.dot(u1_t,W1),W2) + 0.1*y0_tm1 + \
@@ -279,18 +278,17 @@ class T_Scan(unittest.TestCase):
            y2_t = theano.dot(u1_t, W1)
            nwW1 = W1 + .1
            nwW2 = W2 + .05
-            return ([y0_t, y1_t, y2_t], [(W1,nwW1), (W2, nwW2)])
+            return ([y0_t, y1_t, y2_t], [( W1,nwW1), (W2, nwW2)])
-       u2 = theano.tensor.matrix()
+       u2 = theano.tensor.matrix('u2')
-       y0 = theano.tensor.matrix()
+       y0 = theano.tensor.matrix('y0')
-       y2 = theano.tensor.vector()
-       Y,upds = theano.scan(f, [u1,u2], [y0,y1,y2],[], outputs_taps = {0:[-3,-2,-1], 2:[]})
+       Y,upds = theano.scan(f, [u1,u2], [ dict(initial = y0, taps = [-3,-2,-1]),y1, None])
-       f = theano.function([u2,y0,y2], Y, updates = upds)
+       f = theano.function([u2,y0], Y, updates = upds)
-       vls = f(u2_vals, y0_vals, y2_vals)
+       vls = f(u2_vals, y0_vals)
       # do things in numpy
       v_y0 = numpy.zeros((6,20))
@@ -308,7 +306,7 @@ class T_Scan(unittest.TestCase):
          vW1 = vW1 + .1
          vW2 = vW2 + .05
-    def test_9(self):
+    def test_gibbs_chain(self):
        W_vals  = numpy.random.rand(20,30) -.5
        vis_val = numpy.random.binomial(1,0.5, size=(3,20))
@@ -331,8 +329,7 @@ class T_Scan(unittest.TestCase):
            return trng.binomial(vsample.shape,1,vsample)
-        v_vals, updts = theano.scan(f, [], [vis],[], n_steps = 10,
+        v_vals, updts = theano.scan(f, [], [vis],[], n_steps = 10)
-                     sequences_taps = {}, outputs_taps = {})
        my_f = theano.function([vis], v_vals[-1], updates = updts)
@@ -356,19 +353,16 @@ class T_Scan(unittest.TestCase):
        assert (compareArrays(t_res, n_res))
-    def test_10(self):
+    def test_only_shared_no_input_no_output(self):
-      s = theano.shared(1)
-      def f_pow2():
+        s = theano.shared(1)
-        return {s: 2*s}
+        def f_pow2():
+            return {s: 2*s}
-      n_steps = theano.tensor.dscalar()
+        n_steps = theano.tensor.dscalar()
-      Y, updts = theano.scan(f_pow2, [],[], [],n_steps = n_steps)
+        Y, updts = theano.scan(f_pow2, [],[], [],n_steps = n_steps)
-      f1 = theano.function([n_steps], Y, updates = updts)
+        f1 = theano.function([n_steps], Y, updates = updts)
-      f1(3)
+        f1(3)
-      assert compareArrays(s.value, 8)
+        assert compareArrays(s.value, 8)
    '''
    # test gradient simple network 
@@ -386,14 +380,12 @@ class T_Scan(unittest.TestCase):
    '''
    def test_map_functionality(self):
-        raise SkipTest('Map functionality not implemented yet')
        def f_rnn(u_t):
            return u_t + 3
        u    = theano.tensor.dvector()
-        Y, updts = theano.scan(f_rnn, sequences=u, outputs_taps={0:[]})
+        Y, updts = theano.scan(f_rnn, u, [None])
        f2    = theano.function([u], Y, updates = updts)
        v_u   = numpy.array([1.,2.,3.,4.])