Updated the interface and documentation of scan following James suggestions; I…

Updated the interface and documentation of scan following James suggestions; I also implemented a map function using scan

Updated the interface and documentation of scan following James suggestions; I…
78d2b9a7 · Razvan Pascanu · 90cf38f3 · 78d2b9a7 · 78d2b9a7 · 78d2b9a7
--- a/doc/library/scan.txt
+++ b/doc/library/scan.txt
@@ -32,8 +32,7 @@ The equivalent Theano code would be

  # Symbolic description of the result
  result,updates = theano.scan(fn = lambda x_tm1,A: x_tm1*A,\
-                       sequences      = [], \
-                       initial_states = T.ones_like(A),\
+                       info_outputs = T.ones_like(A),\
                       non_sequences  = A, \
                       n_steps        = k)

@@ -46,13 +45,12 @@ construct a function (using a lambda expression) that given `x_tm1` and
 is the value of our output at time step ``t-1``. Therefore 
 ``x_t`` (value of output at time `t`) is `A` times value of output 
 at `t-1`. 
-Next we assign an empy list to ``sequences`` (since we do not need to
-iterate over anything) and initialize the output as a tensor with same
-shape as A filled with ones. We give A as a non sequence parameter  and
-tell scan to iterate for k steps. 
+Next we initialize the output as a tensor with same
+shape as A filled with ones. We give A to scan as a non sequence parameter  and
+specify the number of steps k to iterate over our lambda expression. 

 Scan will return a tuple, containing our result (``result``) and a
-dictionary of updates ( empty for this example). Note that the result 
+dictionary of updates ( empty in this case). Note that the result 
 is not a matrix, but a 3D tensor containing the value of ``A**k`` for 
 each step. We want the last value ( after k steps ) so we compile 
 a function to return just that. 
@@ -69,8 +67,8 @@ that our RNN is defined as follows :

  y(n) = W^{out} x(n- 3)

-Note that this network is far away from a classical recurrent neural
-network and might be in practice useless. The reason we defined as such
+Note that this network is far from a classical recurrent neural
+network and might be useless. The reason we defined as such
 is to better ilustrate the features of scan. 

 In this case we have a sequence over which we need to iterate ``u``, 
@@ -89,12 +87,15 @@ construct a function that computes one iteration step :

    return [x_t, y_t]

+As naming convention for the variables we used ``a_tmb`` to mean ``a`` at 
+``t-b`` and ``a_tpb`` to be ``a`` at ``t+b``. 
 Note the order in which the parameters are given, and in which the
 result is returned. Try to respect cronological order among 
-the taps ( time slices of sequences or outputs) used. In practice what
-is crucial to happen for the computation to work is to give the slices 
-in the same order as provided in the ``sequence_taps``/``outputs_taps`` dictionaries and to have same
-order of inputs here as when applying scan. Given that we have all 
+the taps ( time slices of sequences or outputs) used. For scan is crucial only
+for the variables representing the different time taps to be in the same order
+as the one in which these taps are given. Also, not only taps should respect
+an order, but also variables, since this is how scan figures out what should 
+be represented by what. Given that we have all 
 the Theano variables needed we construct our RNN as follows : 

 .. code-block:: python
@@ -106,12 +107,10 @@ the Theano variables needed we construct our RNN as follows :
                   # y[-1]


-   ([x_vals, y_vals],updates) = theano.scan(fn             = oneStep, \
-                                sequences      = [u], \
-                                initial_states = [x0,y0], \
-                                non_sequences  = [W,W_in_1,W_in_2,W_feedback, W_out], \
-                                sequences_taps = {0:[-4,0] },\
-                                outputs_taps   = {0:[-3,-1] },)
+   ([x_vals, y_vals],updates) = theano.scan(fn = oneStep, \
+                                sequences    = dict(input = u, taps= [-4,-0]), \
+                                info_outputs = [dict(initial = x0, taps = [-3,-1]),y0], \
+                                non_sequences  = [W,W_in_1,W_in_2,W_feedback, W_out])
        # for second input y, scan adds -1 in output_taps by default


@@ -153,7 +152,7 @@ the following:

 sample = theano.tensor.vector()

- values, updates = theano.scan( OneStep, [],sample, [], n_steps = 10 )
+ values, updates = theano.scan( OneStep, info_outputs = sample, n_steps = 10 )

 gibbs10 = theano.function([sample], values[-1], updates = updates)

@@ -177,7 +176,7 @@ afterwards. Look at this example :
 .. code-block:: python

 a = theano.shared(1)
- values,updates = theano.scan( lambda : {a:a+1}, [],[],[], n_steps = 10 )
+ values,updates = theano.scan( lambda : {a:a+1}, n_steps = 10 )

 In this case the lambda expression does not require any input parameters 
 and returns an update dictionary which tells how ``a`` should be updated

--- a/theano/scan.py
+++ b/theano/scan.py
@@ -60,12 +60,18 @@ def hash_listsDictsTuples(x):
        pass
    return hash_value

-def _map(fn, sequences, non_sequences=[]):
-    #TODO
-    #UGLY HACK: instead of figuring out how many outputs there are, we 
-    # will assume there are less than 100 of them
-    return scan(fn, sequences=sequences, 
-            outputs_taps=dict([(i,[]) for i in xrange(100)]))
+
+
+###################################
+## Implement specific function calls : map, reduce, generate
+
+def map(fn, sequences, non_sequences = [], n_steps =0, truncate_gradient = -1, \
+        go_backwards = False, mode = 'FAST_RUN'):
+    return scan(fn, sequences= sequences, non_sequences = non_sequences, 
+                truncate_gradient = truncate_gradient, go_backwards = go_backwards, 
+                mode = mode)
+
+

 # CONSIDER ALTERNATE CALLING CONVENTIONS:
 # simple:
@@ -91,11 +97,13 @@ def _map(fn, sequences, non_sequences=[]):
 #   If the larger (in absolute values) the sequence_taps, the shorter the output
 #   right?  If the sequence_taps = {0: [-10, 10]}, and I pass an input with 22
 #   rows, then the scan will output something of length <=2 right?
-#
+#   
+# ANSWER:
+#   Yes, actually it will be exactly 2 ( if there are no other constraints)

-def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={}, \
-         sequences_taps={}, outputs_taps = {}, n_steps = 0, \
-         truncate_gradient = -1, go_backwards = False, 
+
+def scan(fn, sequences=[], info_outputs=[], non_sequences=[], 
+         n_steps = 0, truncate_gradient = -1, go_backwards = False, 
         mode = None):
    '''Function that constructs and applies a Scan op

@@ -108,14 +116,14 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
        should have the following order:

        * all time slices of the first sequence (as given in the 
-          ``sequences`` list) ordered cronologically
+          ``sequences`` list) ordered in the same fashion as the time taps provided
        * all time slices of the second sequence (as given in the 
-          ``sequences`` list) ordered cronologically
+          ``sequences`` list) ordered in the same fashion as the time taps provided
        * ...
        * all time slices of the first output (as given in the  
-          ``initial_state`` list) ordered cronologically 
+          ``initial_state`` list) ordered in the same fashion as the time taps provided
        * all time slices of the second otuput (as given in the 
-          ``initial_state`` list) ordered cronologically
+          ``initial_state`` list) ordered in the same fashion as the time taps provided
        * ...
        * all other parameters over which scan doesn't iterate given 

@@ -128,21 +136,50 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
        them as a tuple : (outputs, updates) or (updates, outputs). 

        Outputs can be just a theano expression if you have only one outputs or
-        a list of theano expressions. Updates can be given either as a list of
+        a list of theano expressions. Updates can be given either as a list of tuples or
        as a dictionary. If you have a list of outputs, the order of these
        should match that of their ``initial_states``. 

    :param sequences: 
-        list of Theano variables over which scan needs to iterate.
-
-    :param initial_states: 
-        list of Theano variables containing the initial state used for the
-        output. Note that if the function applied recursively uses only the
-        previous value of the output or none, this initial state should have
+        list of Theano variables or dictionaries containing Theano variables over which 
+        scan needs to iterate. The reason you might want to wrap a certain Theano 
+        variable in a dictionary is to provide auxiliary information about how to iterate
+        over that variable. For example this is how you specify that you want to use 
+        several time slices of this sequence at each iteration step. The dictionary 
+        should have the following keys : 
+        
+        * ``input`` -- Theano variable representing the sequence
+        * ``taps`` -- temporal taps to use for this sequence. They are given as a list
+          of ints, where a value ``k`` means that at iteration step ``t`` scan needs to
+          provide also the slice ``t+k`` The order in which you provide these int values
+          here is the same order in which the slices will be provided to ``fn``.
+        
+        If you do not wrap a variable around a dictionary, scan will do it for you, under
+        the assumption that you use only one slice, defined as a tap of offset 0. This 
+        means that at step ``t`` scan will provide the slice at position ``t``.
+
+    :param info_outputs: 
+        list of Theano variables or dictionaries containing Theano variables used 
+        to initialize the outputs of scan. As before (for ``sequences``) the reason 
+        you would wrap a Theano variable in a dictionary is to provide additional 
+        information about how scan should deal with that specific output. The dictionary
+        should contain the following keys:
+
+        * ``initial`` -- Theano variable containing the initial state of the output
+        * ``taps`` -- temporal taps to use for this output. The taps are given as a 
+        list of ints (only negative .. since you can not use future values of outputs),
+        with the same meaning as for ``sequences`` (see above). 
+        * ``inplace`` -- theano variable pointing to one of the input sequences; this 
+        flag tells scan that the output should be computed in the memory spaced occupied
+        by that input sequence. Note that scan will only do this if allowed by the 
+        rest of your computational graph.
+
+        If the function applied recursively uses only the
+        previous value of the output, the initial state should have
        same shape as one time step of the output; otherwise, the initial state
-        should have the same number of dimension as output. This can easily be
-        understand through an example. For computing ``y[t]`` let assume that we
-        need ``y[t-1]``, ``y[t-2]`` and ``y(t-4)``. Through an abuse of
+        should have the same number of dimension as output. This is easily 
+        understood through an example. For computing ``y[t]`` let us assume that we
+        need ``y[t-1]``, ``y[t-2]`` and ``y[t-4]``. Through an abuse of
        notation, when ``t = 0``, we would need values for ``y[-1]``, ``y[-2]``
        and ``y[-4]``. These values are provided by the initial state of ``y``,
        which should have same number  of dimension as ``y``, where the first
@@ -150,52 +187,28 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
        case is 4.  If ``init_y`` is the variable containing the initial state
        of ``y``, then ``init_y[0]`` corresponds to ``y[-4]``, ``init_y[1]``
        corresponds to ``y[-3]``, ``init_y[2]`` corresponds to ``y[-2]``,
-        ``init_y[3]`` corresponds to ``y[-1]``. By default, scan is set to use
-        the last time step for each output. 
+        ``init_y[3]`` corresponds to ``y[-1]``. The default behaviour of scan is 
+        the following : 
+
+        * if you do not wrap an output in a dictionary, scan will wrap it for you 
+        assuming that you use only the last step of the output ( i.e. it makes your tap
+        value list equal to [-1]) and that it is not computed inplace
+        * if you wrap an output in a dictionary but you do not provide any taps, but 
+        you provide an initial state it will assume that you are using only a tap value 
+        of -1
+        * if you wrap an output in a dictionary but you do not provide any initial state,
+        it assumes that you are not using any form of taps 

    :param non_sequences:
        Parameters over which scan should not iterate.  These parameters are
        given at each time step to the function applied recursively.

-    :param inplace_map: 
-        Dictionary describing outputs computed *inplace*.  ``inplace_map`` is a
-        dictionary where keys are output indexes, and values are sequence
-        indexes.  Assigning a value ``j`` to a key ``i`` means that output
-        number ``j`` will be computed inplace (in the same memory buffer) as the
-        input number ``i``.
-
-    :param sequences_taps: 
-        Dictionary describing what slices of the input sequences scan should
-        use. At each step of the iteration you can use different slices of your
-        input sequences(called here taps), and this dictionary lets you define
-        exactly that. The keys of the dictionary are sequence indexes, the
-        values are list of numbers. Having the following entry ``i :
-        [k_1,k_2,k_3]``, means that at step ``t``, for sequence ``x``, that has
-        the index ``i`` in the list of sequences, you would use the values
-        ``x[t+k_1]``, ``x[t+k_2]`` and ``x[t+k_3]``. ``k_1``, ``k_2``, ``k_3``
-        values can be positive or negative and the sequence for you request this
-        taps should be large enough to accomodate them. If in the chronological
-        order, ``k`` is the first past value of sequence ``x``, then index 0 of
-        ``x`` will correspond to step ``k`` (if ``k`` is -3, then, abusing
-        notation ``x[0]`` will be seen by scan as ``x[-3]``). If you do not want
-        to use any taps for a given sequence you need to set the corresponding
-        entry in the dictionary to the empy list. By default, for each sequence
-        that is not represented in the dictionary scan will assume that the at
-        every step it needs to provide the current value of that sequence.
-
-    :param outputs_taps: 
-        Dictionary describing what slices of the input sequences scan should
-        use. The ``outputs_taps`` are defined in an analogous way to
-        ``sequences_taps``, just that the taps are for the outputs generated by
-        scan. As such they can only be negative, i.e.  refer to past value of
-        outputs. By default scan will expect to use for any output the last time
-        step, if nothing else is specified.

    :param n_steps: 
-        Number of steps to iterate. Sometimes you want to either enforce a fixed
-        number of steps, or you might not even have any sequences you want to
-        iterate over, but rather just to repeat some computation for a fixed
-        number of steps. It can be a theano scalar or a number. 
+        Number of steps to iterate. If this value is provided scan will run only for 
+        this amount of steps (given that the input sequences are sufficiently long). 
+        If there is no input sequence (for example in case of a generator network) scan 
+        will iterate for this number of steps. It can be a theano scalar or a number. 

    :param truncate_gradient:
        Number of steps to use in truncated BPTT.  If you compute gradients
@@ -221,10 +234,10 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
    else:
        seqs = sequences
        
-    if not (type(initial_states) in (list,tuple)):
-        init_outs = [initial_states]
+    if not (type(info_outputs) in (list,tuple)):
+        info_outs = [info_outputs]
    else: 
-        init_outs = initial_states
+        info_outs = info_outputs
        
    if not (type(non_sequences) in (list,tuple)):
        non_seqs = [non_sequences]
@@ -233,49 +246,85 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},



-    # compute number of sequences and number of seqs   
+    # compute number of sequences and number of outputs 
    n_seqs     = len(seqs)
-    n_init_outs   = len(init_outs)
+    n_outs   = len(info_outs)

-    # update sequences_taps[idx] to contain 0 if it is not defined
+    inplace_map = {}
+    sequences_taps = {}
+    outputs_taps  = {}
+    # wrap sequences in a dictionary if they are not already
+    # in the same pass create a sequences_taps dictionary
    for i in xrange(n_seqs):
-        if not sequences_taps.has_key(i):
-            sequences_taps.update({i:[0]})
-        # if input sequence is not actually used by the recursive function
-        elif sequences_taps[i] == []:
-            sequences_taps.__delitem__(i)
-        elif not (type(sequences_taps[i]) in (list,tuple)):
-            sequences_taps[i] = [sequences_taps[i]]
-    # update outputs_taps[idx] to contain -1 if it is not defined
-    for i in xrange(n_init_outs):
-        if not outputs_taps.has_key(i):
-            outputs_taps.update({i:[-1]})
-        elif outputs_taps[i] == []:
-            outputs_taps.__delitem__(i)
-        elif not(type(outputs_taps[i]) in (list,tuple)):
-            outputs_taps[i] = [outputs_taps[i]]
-                      
+        if not type(seqs[i]) == dict : 
+            seqs[i] = dict(input=seqs[i], taps=[0])
+        # see if taps values are provided as a list
+        elif seqs[i].get('taps',None):
+            if not type(seqs[i]['taps']) in (tuple,list):
+                    seqs[i]['taps'] = [seqs[i]['taps']]
+        else:
+            seqs[i][taps] = [0]

+        if seqs[i].get('taps',None):
+            sequences_taps[i] = seqs[i]['taps']
+
+
+
+    # wrap outputs info in a dictionary if they are not already
+    # in the same pass create a init_outs_taps dictionary and a inplace map
+
+
+    for i in xrange(n_outs):
+        if info_outs[i]:
+            if not type(info_outs[i]) == dict:
+                info_outs[i] = dict(initial=info_outs[i], taps = [-1])
+                # if there is no initial state but there are taps     
+            elif (not info_outs[i].get('initial',None)) and(info_outs[i].get('taps',None)):
+                raise ValueError('If you are using slices of an output you need to '\
+                        'provide a initial state for it', info_outs[i])
+            elif info_outs[i].get('initial',None) and (not info_outs[i].get('taps',None)):
+                info_outs[i]['taps'] = [-1]
+        else:
+            info_outs[i] = dict()
+    
+        if info_outs[i].get('taps', None):
+           outputs_taps[i] = info_outs[i]['taps']
+        if info_outs[i].get('inplace', None):
+            # look for that variable to get the index
+            found = None
+            for k in xrange(n_seqs):
+                if seqs[k].get('input', None) == info_outs[i].get('inplace',None):
+                    found = k
+            if found != None: 
+                inplace_map[i] = k
+            else:
+                raise ValueError('Asked to compute in place of a non-input variable',\
+                          info_outs[i].get('inplace', None))


    # create theano inputs for the recursive function  
    args = []
    _ins = 0 
    _outs = 0
-    for (i,seq) in enumerate(seqs):
-      if sequences_taps.has_key(i):
-        for k in xrange(len(sequences_taps[i])):
-            args += [seq[0].type() ]
-            _ins += 1
-    for (i,init_out) in enumerate(init_outs):
-      if outputs_taps.has_key(i):
-        for k in xrange(len(outputs_taps[i])):
-            if outputs_taps[i] == [-1]:
-                args += [init_out.type() ]
-                _outs += 1
-            else:
-                args += [init_out[0].type() ]
-                _outs += 1
+    # go through sequences picking up time slices as needed
+    for seq in seqs:
+        if seq.get('taps', None):
+            slices = [ seq['input'][0].type() for k in seq['taps'] ]
+            args += slices
+            _ins += len(seq['taps'])
+    # go through outputs picking up time slices as needed
+    for init_out in info_outs:
+        if init_out.get('taps', None) == [-1]:
+            args += [init_out['initial'].type()]
+            _outs += 1
+        elif init_out.get('taps',None):
+            if numpy.any(numpy.array(init_out.get('taps',[])) > 0):
+                raise ValueError('Can not use future taps of outputs', init_out)
+            slices = [ init_out['initial'][0].type() for k in init_out['taps'] ] 
+            args  += slices
+            _outs += len(init_out['taps'])
+
+    # remove shared variables from the non sequences list
    noshared = []
    for non_seq in non_seqs:
        if not isinstance(non_seq, theano.compile.SharedVariable):
@@ -331,26 +380,39 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},

    ls_outputs      = [ sout.variable for sout in dummy_f.maker.outputs]
    update_map      = {}
-    n_actual_outs   = len(dummy_f.maker.outputs)
    shared_outs     = []
    shared_non_seqs = []
    givens          = {}

+    # if the number of outputs to the function does not match the number of 
+    # assumed outputs
+    if len(ls_outputs) != n_outs:
+        if info_outs == []:
+            # We know how to deal with this case, assume that none of the outputs
+            # are required to have any sort of time taps
+            # we just need to update the number of actual outputs
+            n_outs = len(ls_outputs)
+        else:
+            raise ValueError('There has been a terrible mistake in our input arguments'
+                    ' and scan is totally lost. Make sure that you indicate for every '
+                    ' output what taps you want to use, or None, if you do not want to '
+                    ' use any !')
+
    ls_inputs=[inp.variable for inp in \
                    dummy_f.maker.expanded_inputs[:_ins+_outs]]
    fromIdx = _ins + _outs

-    stored_steps_output = [ 0 for i in xrange(n_actual_outs)]
+    stored_steps_output = [ 0 for i in xrange(n_outs)]
    # add shared variable that act as outputs
    #
-    n_outs = n_actual_outs
+    n_outs_extended = n_outs
    for inp in dummy_f.maker.expanded_inputs[fromIdx:] :
        if isinstance(inp.variable, theano.compile.SharedVariable) and inp.update:
            ls_inputs.append(inp.variable.type())
            ls_outputs += [inp.update]
-            update_map[ inp.variable ] = n_outs 
-            outputs_taps[ n_outs ] = [-1]
-            n_outs += 1
+            update_map[ inp.variable ] = n_outs_extended 
+            outputs_taps[ n_outs_extended ] = [-1]
+            n_outs_extended += 1
            stored_steps_output += [1] 
            shared_outs += [inp.variable]
            givens[inp.variable] = ls_inputs[-1]
@@ -365,15 +427,17 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
            ls_inputs.append(inp.variable)
    
    # Create the Scan op object
-    local_op = Scan( (ls_inputs,ls_outputs, givens ), n_seqs, n_outs, inplace_map,
-            sequences_taps, outputs_taps, truncate_gradient,
+    local_op = Scan( (ls_inputs,ls_outputs, givens ), n_seqs, n_outs_extended, 
+            inplace_map, sequences_taps,  outputs_taps, truncate_gradient,
            go_backwards, stored_steps_output, mode)

    # Call the object on the input sequences, initial values for outs, 
    # and non sequences
+    unwrapped_seqs = [ seq.get('input',theano.tensor.as_tensor(0)) for seq in seqs ]
+    unwrapped_outs = [ out.get('initial',theano.tensor.as_tensor(0)) for out in info_outs ]
    values =  local_op( *(    [theano.tensor.as_tensor(n_steps)]  \
-                         + seqs \
-                         + init_outs \
+                         + unwrapped_seqs \
+                         + unwrapped_outs \
                         + shared_outs \
                         + noshared
                         + shared_non_seqs))
@@ -383,11 +447,11 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
    for k in update_map.keys():
        update_map[k] = values [ update_map[k] ] 

-    if n_actual_outs != n_outs : 
-        if n_actual_outs == 1:
+    if n_outs != n_outs_extended : 
+        if n_outs == 1:
            values = values[0]
        else:
-            values = values[:n_actual_outs]
+            values = values[:n_outs]


    return (values, update_map)
@@ -618,7 +682,7 @@ class Scan(theano.Op):



-    def scan(self,fn, args, n_seqs, n_outs, seqs_taps, outs_taps,  n_steps, 
+    def scan(self, fn, args, n_seqs, n_outs, seqs_taps, outs_taps,  n_steps, 
             go_backwards, inplace_map):

      y = []
@@ -704,9 +768,21 @@ class Scan(theano.Op):
        #update outputs
        for j in xrange(n_outs):
          if self.stored_steps_output[j] <1:
-              y[j][i] = something[j]
+              # if you have provided no size for the missing output you might find yourself
+              # here with a incorect array .. if that happens realocate memory for the needed
+              # array
+              try : 
+                  y[j][i] = something[j]
+              except :
+                  y[j] = numpy.empty( (n_steps,)+something[j].shape , dtype =
+                                                              something[j].dtype)
+                  y[j][i] = something[j]
          elif self.stored_steps_output[j] == 1:
-              y[j] = something[j]
+              try:
+                  y[j] = something[j]
+              except:
+                  y[j] = numpy.empty( something[j].shape, dtype = something[j].dtype)
+                  y[j] = something[j]
          else:
            raise NotImplementedError('This will be implemented in the near future')
      return y

--- a/theano/tests/test_scan.py
+++ b/theano/tests/test_scan.py
@@ -103,7 +103,7 @@ class T_Scan(unittest.TestCase):

    # generator network, only one output , type scalar ; no sequence or 
    # non sequence arguments
-    def test_1(self):
+    def test_generator_one_output_scalar(self):


      def f_pow2(x_tm1):
@@ -117,7 +117,7 @@ class T_Scan(unittest.TestCase):
      assert(compareArrays(f1(1,3), [2,4,8]))
    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars
-    def test_2(self):
+    def test_one_sequence_one_output_weights(self):


        def f_rnn(u_t,x_tm1,W_in, W):
@@ -138,9 +138,9 @@ class T_Scan(unittest.TestCase):

    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars; using shared variables
-    def test_3(self):
+    def test_one_sequence_one_output_weights_shared(self):
    
-        u    = theano.tensor.dvector()
+        u    = theano.tensor.dvector() 
        x0   = theano.tensor.dscalar()
        W_in = theano.shared(.1, name = 'w_in')
        W    = theano.shared(1., name ='w')
@@ -158,7 +158,7 @@ class T_Scan(unittest.TestCase):

    # some rnn with multiple outputs and multiple inputs; other dimension 
    # instead of scalars/vectors
-    def test_4(self):
+    def test_multiple_inputs_multiple_outputs(self):
    
        W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
        W     = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
@@ -191,7 +191,7 @@ class T_Scan(unittest.TestCase):
    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars; using shared variables and past 
    # taps (sequences and outputs)
-    def test_5(self):
+    def test_using_taps_input_output(self):
    
        u    = theano.tensor.dvector()
        x0   = theano.tensor.dvector()
@@ -201,8 +201,8 @@ class T_Scan(unittest.TestCase):
        def f_rnn_shared(u_tm2, x_tm1, x_tm2):
            return u_tm2*W_in+x_tm1*W+x_tm2
    
-        Y, updates = theano.scan(f_rnn_shared, u,x0, [], \
-                 sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})
+        Y, updates = theano.scan(f_rnn_shared, dict(input=u, taps=-2), 
+                 dict(initial = x0, taps = [-1,-2]), [])

        f7   = theano.function([u,x0], Y, updates = updates)
        v_u  = numpy.asarray([1.,2.,3.,4.])
@@ -213,7 +213,7 @@ class T_Scan(unittest.TestCase):
    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars; using shared variables and past 
    # taps (sequences and outputs) and future taps for sequences
-    def test_6(self):
+    def test_past_future_taps_shared(self):
    
        u    = theano.tensor.dvector()
        x0   = theano.tensor.dvector()
@@ -223,8 +223,8 @@ class T_Scan(unittest.TestCase):
        def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
            return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
    
-        Y,updts = theano.scan(f_rnn_shared, u,x0, [], \
-                 sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})
+        Y,updts = theano.scan(f_rnn_shared, dict( input = u, taps=[-2,2]),\
+                 dict(initial = x0, taps = [-1,-2]), [])

        f8   = theano.function([u,x0], Y, updates = updts)
        v_u  = numpy.array([1.,2.,3.,4.,5.,6.])
@@ -234,7 +234,7 @@ class T_Scan(unittest.TestCase):
        assert (compareArrays( out, f8(v_u, v_x0) ) )
    
    # simple rnn ; compute inplace
-    def test_7(self):
+    def test_inplace(self):
        
        u    = theano.tensor.dvector()
        mu   = theano.Param( u, mutable = True)
@@ -244,8 +244,7 @@ class T_Scan(unittest.TestCase):

        def f_rnn_shared(u_t, x_tm1):
            return u_t*W_in + x_tm1*W
-        Y, updts = theano.scan(f_rnn_shared, u, x0,[], \
-                    inplace_map={0:0} )
+        Y, updts = theano.scan(f_rnn_shared, u, dict( initial = x0, inplace = u),[] )
        f9   = theano.function([mu,x0], Y , updates = updts)
        v_u  = numpy.array([1.,2.,3.])
        v_x0 = numpy.array(1.)
@@ -257,7 +256,7 @@ class T_Scan(unittest.TestCase):
        assert (compareArrays(v_u, out))
    
    # Shared variable with updates
-    def test_8(self):
+    def test_shared_arguments_with_updates(self):
       W1_vals = numpy.random.rand(20,30)
       W2_vals = numpy.random.rand(30,20)
       u1_vals = numpy.random.rand(3,20)
@@ -266,11 +265,11 @@ class T_Scan(unittest.TestCase):
       y1_vals = numpy.random.rand(20)
       y2_vals = numpy.random.rand(30)
    
-       W1 = theano.shared(W1_vals)
-       W2 = theano.shared(W2_vals)
+       W1 = theano.shared(W1_vals,'W1')
+       W2 = theano.shared(W2_vals,'W2')

-       u1 = theano.shared(u1_vals)
-       y1 = theano.shared(y1_vals)
+       u1 = theano.shared(u1_vals,'u1')
+       y1 = theano.shared(y1_vals,'y1')

       def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
            y0_t = theano.dot(theano.dot(u1_t,W1),W2) + 0.1*y0_tm1 + \
@@ -279,18 +278,17 @@ class T_Scan(unittest.TestCase):
            y2_t = theano.dot(u1_t, W1)
            nwW1 = W1 + .1
            nwW2 = W2 + .05
-            return ([y0_t, y1_t, y2_t], [(W1,nwW1), (W2, nwW2)])
+            return ([y0_t, y1_t, y2_t], [( W1,nwW1), (W2, nwW2)])

-       u2 = theano.tensor.matrix()
-       y0 = theano.tensor.matrix()
-       y2 = theano.tensor.vector()
+       u2 = theano.tensor.matrix('u2')
+       y0 = theano.tensor.matrix('y0')

-       Y,upds = theano.scan(f, [u1,u2], [y0,y1,y2],[], outputs_taps = {0:[-3,-2,-1], 2:[]})
+       Y,upds = theano.scan(f, [u1,u2], [ dict(initial = y0, taps = [-3,-2,-1]),y1, None])

-       f = theano.function([u2,y0,y2], Y, updates = upds)
+       f = theano.function([u2,y0], Y, updates = upds)


-       vls = f(u2_vals, y0_vals, y2_vals)
+       vls = f(u2_vals, y0_vals)

       # do things in numpy
       v_y0 = numpy.zeros((6,20))
@@ -308,7 +306,7 @@ class T_Scan(unittest.TestCase):
          vW1 = vW1 + .1
          vW2 = vW2 + .05

-    def test_9(self):
+    def test_gibbs_chain(self):

        W_vals  = numpy.random.rand(20,30) -.5
        vis_val = numpy.random.binomial(1,0.5, size=(3,20))
@@ -331,8 +329,7 @@ class T_Scan(unittest.TestCase):
            return trng.binomial(vsample.shape,1,vsample)

        
-        v_vals, updts = theano.scan(f, [], [vis],[], n_steps = 10,
-                     sequences_taps = {}, outputs_taps = {})
+        v_vals, updts = theano.scan(f, [], [vis],[], n_steps = 10)

        my_f = theano.function([vis], v_vals[-1], updates = updts)

@@ -356,19 +353,16 @@ class T_Scan(unittest.TestCase):

        assert (compareArrays(t_res, n_res))

-    def test_10(self):
-
-      s = theano.shared(1)
-
+    def test_only_shared_no_input_no_output(self):

-      def f_pow2():
-        return {s: 2*s}
-    
-      n_steps = theano.tensor.dscalar()
-      Y, updts = theano.scan(f_pow2, [],[], [],n_steps = n_steps)
-      f1 = theano.function([n_steps], Y, updates = updts)
-      f1(3)
-      assert compareArrays(s.value, 8)
+        s = theano.shared(1)
+        def f_pow2():
+            return {s: 2*s}
+        n_steps = theano.tensor.dscalar()
+        Y, updts = theano.scan(f_pow2, [],[], [],n_steps = n_steps)
+        f1 = theano.function([n_steps], Y, updates = updts)
+        f1(3)
+        assert compareArrays(s.value, 8)
 
    '''
    # test gradient simple network 
@@ -386,14 +380,12 @@ class T_Scan(unittest.TestCase):
    '''

    def test_map_functionality(self):
-        raise SkipTest('Map functionality not implemented yet')
-
        def f_rnn(u_t):
            return u_t + 3
    
        u    = theano.tensor.dvector()

-        Y, updts = theano.scan(f_rnn, sequences=u, outputs_taps={0:[]})
+        Y, updts = theano.scan(f_rnn, u, [None])
    
        f2    = theano.function([u], Y, updates = updts)
        v_u   = numpy.array([1.,2.,3.,4.])