Fixed a bug in pfunc ( bug = when you replaced a shared variable with givens,…

Fixed a bug in pfunc ( bug = when you replaced a shared variable with givens, the shared variable did not got replaced, and the update rule was executed ) that resulted in fixing the failing tests of scan. I also did a bit of cleaining in scan tests and code, and fix an unobserved bug in inplace computation of scan plus made sure scan knows (once the optimization is written) to only store the last k steps of an output

Fixed a bug in pfunc ( bug = when you replaced a shared variable with givens,…
432ef124 · Razvan Pascanu · c87a33e6 · 432ef124 · 432ef124 · 432ef124
--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -60,7 +60,7 @@ FancyModule = Module
 from printing import \
    pprint, pp

-from scan import scan
+from scan import scan,map

 import tensor
 import scalar

--- a/theano/compile/pfunc.py
+++ b/theano/compile/pfunc.py
@@ -5,6 +5,8 @@ from theano.gof import Container, Variable, generic, graph, Constant, Value
 from theano.compile import orig_function, In, Out
 from theano.compile.sharedvalue import SharedVariable, shared
 import numpy # for backport to 2.4, to get any().
+import theano
+

 class Param(object):
    def __init__(self, variable, default=None, name=None, mutable=False, strict=False,
@@ -118,7 +120,7 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
        if v.owner:
            clone_a(v.owner)
        elif isinstance(v, SharedVariable):
-            if v not in shared_inputs:
+            if v not in shared_inputs and v not in clone_d:
                shared_inputs.append(v)

            if hasattr(v, 'default_update'):
@@ -127,14 +129,13 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
                        (isinstance(no_default_updates, list) and\
                        v not in no_default_updates):
                    # Do not use default_update if a "real" update was provided
-                    if v not in update_d:
+                    if v not in update_d and v not in clone_d:
                        v_update = v.filter_update(v.default_update)
                        if v_update.type != v.type:
                            raise TypeError('an update must have the same type as the original shared variable',
                                    (v, v.type, v_update, v_update.type))
                        update_d[v] = v_update
                        update_expr.append((v, v_update))
-
        return clone_d.setdefault(v, v)

    def clone_a(a):
@@ -155,6 +156,7 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
    except:
        pass
    for v_orig, v_repl in givens:
+    
        if not isinstance(v_orig, Variable):
            raise TypeError('given keys must be Variable', v_orig)
        if not isinstance(v_repl, Variable):
@@ -195,6 +197,7 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
        update_d[store_into] = update_val
        update_expr.append((store_into, update_val))

+
    # Elements of "outputs" are here cloned to "cloned_outputs"
    if isinstance(outputs, list):
        cloned_outputs = []
@@ -228,6 +231,7 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
    # If the variable to be updated is a shared variable not already
    # in shared_inputs, add it.
    # Note: we extend update_expr while iterating over it.
+
    i = 0
    while i<len(update_expr):
        v, v_update = update_expr[i]

--- a/theano/compile/tests/test_pfunc.py
+++ b/theano/compile/tests/test_pfunc.py
@@ -442,6 +442,25 @@ class Test_pfunc(unittest.TestCase):
        # a is needed as input if y.default_update is used
        self.failUnlessRaises(TypeError, pfunc, [], x)

+    def test_givens_replaces_shared_variable(self):
+        a = shared(1.,'a')
+        a.default_update = a+3.
+        b = tensor.scalar('b')
+        c = a + 10
+        f = pfunc([b],c, givens = {a:b})
+
+        assert len(f.maker.env.inputs) == 1
+        assert len(f.maker.env.outputs) == 1
+
+    def test_givens_replaces_shared_variable2(self):
+        a = shared(1.,'a')
+        a.default_update = a+3
+        c = a+ 10
+        f = pfunc([],c, givens = { a: a+10} )
+        
+        assert f() == 21
+        assert f() == 34
+
 if __name__ == '__main__':
    theano.config.mode = 'FAST_COMPILE'
    Test_pfunc().test_default_scalar_container()

--- a/theano/scan.py
+++ b/theano/scan.py
@@ -30,6 +30,7 @@ from theano.tensor import opt, TensorType
 from theano import gof, Apply
 from theano.compile import optdb
 import theano.tensor.shared_randomstreams as shared_random
+import copy 

 import numpy

@@ -54,22 +55,23 @@ def hash_listsDictsTuples(x):
        for v in x:
            hash_value ^= hash_listsDictsTuples(v)
    else:
-      try:
-        hash_value ^= hash(x)
-      except:
-        pass
+        try:
+            hash_value ^= hash(x)
+        except:
+            pass
    return hash_value


-
+## TODO
 ###################################
 ## Implement specific function calls : map, reduce, generate

-def map(fn, sequences, non_sequences = [], n_steps =0, truncate_gradient = -1, \
-        go_backwards = False, mode = 'FAST_RUN'):
-    return scan(fn, sequences= sequences, non_sequences = non_sequences, 
-                truncate_gradient = truncate_gradient, go_backwards = go_backwards, 
-                mode = mode)
+def map(fn, sequences, non_sequences = [], n_steps =0, 
+        truncate_gradient = -1, go_backwards = False, 
+        mode = 'FAST_RUN'):
+    return scan(fn, sequences= sequences, outputs_info = [],non_sequences= non_sequences,
+                truncate_gradient= truncate_gradient, 
+                go_backwards= go_backwards, mode = mode)



@@ -102,7 +104,7 @@ def map(fn, sequences, non_sequences = [], n_steps =0, truncate_gradient = -1, \
 #   Yes, actually it will be exactly 2 ( if there are no other constraints)


-def scan(fn, sequences=[], info_outputs=[], non_sequences=[], 
+def scan(fn, sequences=[], outputs_info=[], non_sequences=[], 
         n_steps = 0, truncate_gradient = -1, go_backwards = False, 
         mode = None):
    '''Function that constructs and applies a Scan op
@@ -158,7 +160,7 @@ def scan(fn, sequences=[], info_outputs=[], non_sequences=[],
        the assumption that you use only one slice, defined as a tap of offset 0. This 
        means that at step ``t`` scan will provide the slice at position ``t``.

-    :param info_outputs: 
+    :param outputs_info: 
        list of Theano variables or dictionaries containing Theano variables used 
        to initialize the outputs of scan. As before (for ``sequences``) the reason 
        you would wrap a Theano variable in a dictionary is to provide additional 
@@ -234,10 +236,11 @@ def scan(fn, sequences=[], info_outputs=[], non_sequences=[],
    else:
        seqs = sequences
        
-    if not (type(info_outputs) in (list,tuple)):
-        info_outs = [info_outputs]
+    print outputs_info
+    if not (type(outputs_info) in (list,tuple)):
+        outs_info = [outputs_info]
    else: 
-        info_outs = info_outputs
+        outs_info = outputs_info
        
    if not (type(non_sequences) in (list,tuple)):
        non_seqs = [non_sequences]
@@ -245,14 +248,13 @@ def scan(fn, sequences=[], info_outputs=[], non_sequences=[],
        non_seqs = non_sequences


-
    # compute number of sequences and number of outputs 
-    n_seqs     = len(seqs)
-    n_outs   = len(info_outs)
+    n_seqs = len(seqs)
+    n_outs = len(outs_info)

-    inplace_map = {}
+    inplace_map    = {}
    sequences_taps = {}
-    outputs_taps  = {}
+    outputs_taps   = {}
    # wrap sequences in a dictionary if they are not already
    # in the same pass create a sequences_taps dictionary
    for i in xrange(n_seqs):
@@ -261,83 +263,97 @@ def scan(fn, sequences=[], info_outputs=[], non_sequences=[],
        # see if taps values are provided as a list
        elif seqs[i].get('taps',None):
            if not type(seqs[i]['taps']) in (tuple,list):
-                    seqs[i]['taps'] = [seqs[i]['taps']]
+                seqs[i]['taps'] = [seqs[i]['taps']]
        else:
            seqs[i][taps] = [0]

        if seqs[i].get('taps',None):
            sequences_taps[i] = seqs[i]['taps']

-
-
    # wrap outputs info in a dictionary if they are not already
    # in the same pass create a init_outs_taps dictionary and a inplace map
-
-
+    
+    print n_outs
+    print outs_info
    for i in xrange(n_outs):
-        if info_outs[i]:
-            if not type(info_outs[i]) == dict:
-                info_outs[i] = dict(initial=info_outs[i], taps = [-1])
+        if outs_info[i]:
+            if not type(outs_info[i]) == dict:
+                outs_info[i] = dict(initial=outs_info[i], taps = [-1])
                # if there is no initial state but there are taps     
-            elif (not info_outs[i].get('initial',None)) and(info_outs[i].get('taps',None)):
+            elif (not outs_info[i].get('initial',None)) and(outs_info[i].get('taps',None)):
                raise ValueError('If you are using slices of an output you need to '\
-                        'provide a initial state for it', info_outs[i])
-            elif info_outs[i].get('initial',None) and (not info_outs[i].get('taps',None)):
-                info_outs[i]['taps'] = [-1]
+                        'provide a initial state for it', outs_info[i])
+            elif outs_info[i].get('initial',None) and (not outs_info[i].get('taps',None)):
+                outs_info[i]['taps'] = [-1]
        else:
-            info_outs[i] = dict()
+            outs_info[i] = dict()
    
-        if info_outs[i].get('taps', None):
-           outputs_taps[i] = info_outs[i]['taps']
-        if info_outs[i].get('inplace', None):
+        if outs_info[i].get('taps', None):
+            outputs_taps[i] = outs_info[i]['taps']
+        if outs_info[i].get('inplace', None):
            # look for that variable to get the index
            found = None
            for k in xrange(n_seqs):
-                if seqs[k].get('input', None) == info_outs[i].get('inplace',None):
+                if seqs[k].get('input', None) == outs_info[i].get('inplace',None):
                    found = k
-            if found != None: 
-                inplace_map[i] = k
+            if found != None:
+                # NOTE : inplace_map is identical to destroy_map, i.e. it tells what output
+                #     is computed inplace of what input !!
+                inplace_map[i] = found
            else:
                raise ValueError('Asked to compute in place of a non-input variable',\
-                          info_outs[i].get('inplace', None))
-
+                          outs_info[i].get('inplace', None))

+    
    # create theano inputs for the recursive function  
+    # note : this is a first batch of possible inputs that will 
+    #        be compiled in a dummy function; we used this dummy
+    #        function to detect shared variables and their updates
+    #        and to construct a new list of possible inputs
    args = []
-    _ins = 0 
-    _outs = 0
+    dummy_notshared_ins = 0 
+    dummy_notshared_init_outs = 0
+    slice_to_seqs = []
    # go through sequences picking up time slices as needed
-    for seq in seqs:
+    for i,seq in enumerate(seqs):
        if seq.get('taps', None):
            slices = [ seq['input'][0].type() for k in seq['taps'] ]
+            slice_to_seqs += [ i for k in seq['taps']]
            args += slices
-            _ins += len(seq['taps'])
+            dummy_notshared_ins += len(seq['taps'])
    # go through outputs picking up time slices as needed
-    for init_out in info_outs:
+    for i,init_out in enumerate(outs_info):
        if init_out.get('taps', None) == [-1]:
            args += [init_out['initial'].type()]
-            _outs += 1
+            val = slice_to_seqs[-1] if slice_to_seqs else -1
+            slice_to_seqs += [ val+1 ]
+            dummy_notshared_init_outs += 1
        elif init_out.get('taps',None):
            if numpy.any(numpy.array(init_out.get('taps',[])) > 0):
                raise ValueError('Can not use future taps of outputs', init_out)
-            slices = [ init_out['initial'][0].type() for k in init_out['taps'] ] 
+            slices = [ init_out['initial'][0].type() for k in init_out['taps'] ]
+            val = slice_to_seqs[-1] if slice_to_seqs else -1
+            slice_to_seqs += [ val+1 for k in init_out['taps'] ]
            args  += slices
-            _outs += len(init_out['taps'])
+            dummy_notshared_init_outs += len(init_out['taps'])

    # remove shared variables from the non sequences list
-    noshared = []
+    notshared_other_args = []
    for non_seq in non_seqs:
        if not isinstance(non_seq, theano.compile.SharedVariable):
-            noshared += [non_seq]
+            notshared_other_args += [non_seq]

-
-    dummy_args = args + noshared
+    # add only the not shared variables to the arguments of the dummy
+    # function [ a function should not get shared variables as input ]
+    dummy_args = args + notshared_other_args
+    # arguments for the lambda expression that gives us the output 
+    # of the inner function
    args += non_seqs

    outputs_updates  = fn(*args)
    outputs = []
    updates = {}
-    # we try now to separate the outputs from the updates
+    # we will try now to separate the outputs from the updates
    if not type(outputs_updates) in (list,tuple):
        if type(outputs_updates) == dict :
            # we have just an update dictionary
@@ -347,25 +363,26 @@ def scan(fn, sequences=[], info_outputs=[], non_sequences=[],
    else:
        elem0 = outputs_updates[0]
        elem1 = outputs_updates[1]
-        if ( type(elem0) == dict ) or \
-           ( type(elem0) in (list,tuple) and type(elem0[0]) in (list,tuple)):
-                # elem0 is the updates dictionary / list
-                updates = elem0
-                outputs = elem1
-                if not type(outputs) in (list,tuple):
-                    outputs = [outputs]
+        t_el0 = type(elem0)
+        t_el1 = type(elem1)
+        if t_el0 == dict or ( t_el0 in (list,tuple) and type(elem0[0]) in (list,tuple)):
+            # elem0 is the updates dictionary / list
+            updates = elem0
+            outputs = elem1
+            if not type(outputs) in (list,tuple):
+                outputs = [outputs]
        elif ( type(elem1) == dict) or \
             ( type(elem1) in (list,tuple) and type(elem1[0]) in (list,tuple)):
-                # elem1 is the updates dictionary / list
-                updates = elem1
-                outputs = elem0
-                if not type(outputs) in (list,tuple):
-                    outputs = [outputs]
+            # elem1 is the updates dictionary / list
+            updates = elem1
+            outputs = elem0
+            if not type(outputs) in (list,tuple):
+                outputs = [outputs]
        else :
            if type(outputs_updates) in (list,tuple) and \
-               ( type(outputs_updates[0]) in (list,tuple)):
-                 outputs = []
-                 updates = outputs_updates
+                    (type(outputs_updates[0]) in (list,tuple)):
+                outputs = []
+                updates = outputs_updates
            else:
                outputs = outputs_updates
                updates = {}
@@ -373,93 +390,103 @@ def scan(fn, sequences=[], info_outputs=[], non_sequences=[],

    # Wo compile a dummy function just to see what shared variable
    # we have and what are their update rules
-
    dummy_f = theano.function(dummy_args, outputs, updates = updates, mode = \
                 theano.compile.mode.Mode(linker = 'py', optimizer = None) )
-    

-    ls_outputs      = [ sout.variable for sout in dummy_f.maker.outputs]
-    update_map      = {}
-    shared_outs     = []
-    shared_non_seqs = []
-    givens          = {}
+    inner_fn_out_states = [ out.variable for out in dummy_f.maker.outputs]
+    update_map       = {}
+    shared_outs      = []
+    shared_non_seqs  = []
+    givens           = {}

    # if the number of outputs to the function does not match the number of 
    # assumed outputs
-    if len(ls_outputs) != n_outs:
-        if info_outs == []:
+    # find the number of update rules from shared variables 
+    n_update_rules = 0 
+    for v in dummy_f.maker.expanded_inputs :
+        if isinstance(v.variable, theano.compile.SharedVariable) and v.update:
+            n_update_rules += 1
+
+    if len(inner_fn_out_states) != n_outs:
+        if outs_info == []:
            # We know how to deal with this case, assume that none of the outputs
            # are required to have any sort of time taps
            # we just need to update the number of actual outputs
-            n_outs = len(ls_outputs)
+            print len(inner_fn_out_states), n_outs, n_update_rules
+            print inner_fn_out_states
+            n_outs = len(inner_fn_out_states)
            # other updates : 
            for i in xrange(n_outs):
-               info_outs += [ dict() ]  
-
+                outs_info += [ dict() ]  
        else:
+            print outs_info
+            print inner_fn_out_states
+            print n_outs
            raise ValueError('There has been a terrible mistake in our input arguments'
                    ' and scan is totally lost. Make sure that you indicate for every '
                    ' output what taps you want to use, or None, if you do not want to '
                    ' use any !')
+    inner_fn_inputs=[input.variable for input in \
+        dummy_f.maker.expanded_inputs[:dummy_notshared_ins+dummy_notshared_init_outs]]
+    fromIdx = dummy_notshared_ins + dummy_notshared_init_outs

-    ls_inputs=[inp.variable for inp in \
-                    dummy_f.maker.expanded_inputs[:_ins+_outs]]
-    fromIdx = _ins + _outs
-
-    stored_steps_output = [ 0 for i in xrange(n_outs)]
+    store_steps = [ 0 for i in xrange(n_outs)]
    # add shared variable that act as outputs
    #
-    n_outs_extended = n_outs
-    for inp in dummy_f.maker.expanded_inputs[fromIdx:] :
-        if isinstance(inp.variable, theano.compile.SharedVariable) and inp.update:
-            ls_inputs.append(inp.variable.type())
-            ls_outputs += [inp.update]
-            update_map[ inp.variable ] = n_outs_extended 
-            outputs_taps[ n_outs_extended ] = [-1]
-            n_outs_extended += 1
-            stored_steps_output += [1] 
-            shared_outs += [inp.variable]
-            givens[inp.variable] = ls_inputs[-1]
+    n_extended_outs = n_outs
+    for input in dummy_f.maker.expanded_inputs[fromIdx:] :
+        if isinstance(input.variable, theano.compile.SharedVariable) and input.update:
+            new_var = input.variable.type()
+            inner_fn_inputs.append(new_var)
+            val = slice_to_seqs[-1] if slice_to_seqs else -1 
+            slice_to_seqs += [ val+1 ]
+            inner_fn_out_states += [input.update]
+            update_map[ input.variable ] = n_extended_outs
+            outputs_taps[ n_extended_outs ] = [-1]
+            n_extended_outs += 1
+            store_steps += [1] 
+            shared_outs += [input.variable]
+            givens[input.variable] = inner_fn_inputs[-1]

    # add the rest:
-    for inp in dummy_f.maker.expanded_inputs[fromIdx:] :
-        if isinstance(inp.variable, theano.compile.SharedVariable) and not inp.update:
-           shared_non_seqs += [inp.variable]
-           ls_inputs += [inp.variable.type() ]
-           givens[inp.variable] = ls_inputs[-1]
-        elif not isinstance(inp.variable, theano.compile.SharedVariable):
-            ls_inputs.append(inp.variable)
+    for input in dummy_f.maker.expanded_inputs[fromIdx:] :
+        if isinstance(input.variable, theano.compile.SharedVariable) and not input.update:
+           shared_non_seqs += [input.variable]
+           inner_fn_inputs += [input.variable.type() ]
+           val = slice_to_seqs[-1] if slice_to_seqs else -1
+           slice_to_seqs += [val +1]
+           givens[input.variable] = inner_fn_inputs[-1]
+        elif not isinstance(input.variable, theano.compile.SharedVariable):
+            inner_fn_inputs.append(input.variable)
    
    # Create the Scan op object
-    local_op = Scan( (ls_inputs,ls_outputs, givens ), n_seqs, n_outs_extended, 
-            inplace_map, sequences_taps,  outputs_taps, truncate_gradient,
-            go_backwards, stored_steps_output, mode)
+    local_op = Scan( (inner_fn_inputs,inner_fn_out_states, givens, slice_to_seqs ), n_seqs, 
+            n_extended_outs, inplace_map, sequences_taps,  outputs_taps, truncate_gradient,
+            go_backwards, store_steps, mode)

    # Call the object on the input sequences, initial values for outs, 
    # and non sequences
    for seq in seqs : 
        if not seq.get('input', None):
            raiseValue('All input sequences should provide')
-    unwrapped_seqs = [ seq.get('input',theano.tensor.as_tensor(0)) for seq in seqs ]
-    unwrapped_outs = [ out.get('initial',theano.tensor.as_tensor(0)) for out in info_outs ]
-    values =  local_op( *(    [theano.tensor.as_tensor(n_steps)]  \
-                         + unwrapped_seqs \
-                         + unwrapped_outs \
-                         + shared_outs \
-                         + noshared
+    unwrapped_seqs = [ seq.get('input',theano.tensor.as_tensor(0.)) for seq in seqs ]
+    unwrapped_outs = [ out.get('initial',theano.tensor.as_tensor(0.)) for out in outs_info ]
+    values =  local_op( *(    [theano.tensor.as_tensor(n_steps)]  
+                         + unwrapped_seqs 
+                         + unwrapped_outs 
+                         + shared_outs 
+                         + notshared_other_args
                         + shared_non_seqs))

    if not type(values) in (tuple, list):
        values = [values]
-    for k in update_map.keys():
-        update_map[k] = values [ update_map[k] ] 
-
-    if n_outs != n_outs_extended : 
-        if n_outs == 1:
-            values = values[0]
-        else:
-            values = values[:n_outs]
-
+    for val in update_map.keys():
+        update_map[val] = values [ update_map[val] ] 
+    
+    if n_outs == 1:
+        values = values[0]
+    else:    
+        values = values[:n_outs]

    return (values, update_map)

@@ -471,17 +498,17 @@ class Scan(theano.Op):
    # OLD DOCUMENTATION CAN BE FOUND NEAR REVISION 2581
    #

-    def __init__(self,(inputs, outputs, givens),n_seqs,  n_outs,
+    def __init__(self,(inputs, outputs, givens, slice_to_seqs),n_seqs,  n_outs,
                 inplace_map={}, seqs_taps={}, outs_taps={},
                 truncate_gradient = -1,
-                 go_backwards = False, stored_steps_output = {},
+                 go_backwards = False, store_steps = {},
                 mode = 'FAST_RUN', inplace=False):
        '''
-        :param (inputs,outputs, givens): inputs and outputs Theano variables 
-                                         that describe the function that is 
-                                         applied recursively; givens
-                                         list is used to replace shared
-                                         variables with not shared ones
+        :param (inputs,outputs, givens,slice_to_seqs):
+            inputs and outputs Theano variables that describe the function that is 
+            applied recursively; givens list is used to replace shared
+            variables with not shared ones; slice_to_seqs is a convinience list that
+            tells which of the inputs is slice to which of the sequences 
        :param n_seqs: number of sequences over which scan will have to 
                       iterate
        :param n_outs: number of outputs of the scan op
@@ -491,32 +518,13 @@ class Scan(theano.Op):
        :param truncate_gradient: number of steps after which scan should 
                                  truncate -1 implies no truncation 
        :param go_bacwards: see scan funcion above
-        :param stored_steps_output: a list of booleans of same size as the 
-                                    number of outputs; the value at position 
-                                    ``i`` in the list corresponds to the 
-                                    ``i-th`` output, and it tells how many 
-                                    steps (from the end towards the begining)
-                                    of the outputs you really need and should
-                                    return; given this information, scan can 
-                                    know (if possible) to allocate only
-                                    the amount of memory needed to compute 
-                                    that many entries
+        :param store_steps: 
+            a list of booleans of same size as the number of outputs; the value at position 
+            ``i`` in the list corresponds to the ``i-th`` output, and it tells how many 
+            steps (from the end towards the begining) of the outputs you really need and should
+            return; given this information, scan can know (if possible) to allocate only
+            the amount of memory needed to compute that many entries
        '''
-        
-
-        # check inplace map
-        for _out,_in in inplace_map.iteritems():
-            if _out > n_outs:
-                raise ValueError(('Inplace map reffers to an unexisting'\
-                          'output %d')% _out)
-            if _in > n_seqs:
-                raise ValueError(('Inplace map reffers to an unexisting'\
-                          'input sequence %d')%_in)
-            if (_in >= 0) and (min(seqs_taps[_in]) < 0):
-                raise ValueError(('Input sequence %d uses past values that '\
-                         'will be overwritten by inplace operation')%_in)
-
-
        #check sequences past taps
        for k,v in seqs_taps.iteritems():
          if k > n_seqs:
@@ -535,7 +543,7 @@ class Scan(theano.Op):
        # build a list of output types for any Apply node using this op.
        self.apply_output_types = []
        for i, o in enumerate(outputs):
-            if 1 == stored_steps_output[i]:
+            if 1 == store_steps[i]:
                self.apply_output_types.append(o.type)
            else:
                expanded_otype = TensorType(
@@ -548,6 +556,15 @@ class Scan(theano.Op):
        if inplace:
            for i in inplace_map.keys():
                self.destroy_map.update({i: [inplace_map[i]+1] } )
+            # make all inplace inputs mutable for the inner function for extra efficency
+            for idx in xrange(len(inputs)):
+                # get seq number
+                n_seq = slice_to_seqs[idx]
+                if n_seq in inplace_map.keys():
+                    if type(inputs[n_seq]) is theano.Param:
+                        inputs[n_seq].mutable = True
+                    else:
+                        inputs[n_seq] = theano.Param( inputs[n_seq], mutable = True)

        self.seqs_taps      = seqs_taps
        self.outs_taps      = outs_taps
@@ -555,15 +572,19 @@ class Scan(theano.Op):
        self.n_outs         = n_outs
        self.n_args         = n_seqs+n_outs+1
        self.inplace_map    = inplace_map
-        self.stored_steps_output   = stored_steps_output
+        self.store_steps    = store_steps
        self.inplace        = inplace
        self.inputs         = inputs
        self.givens         = givens
        self.outputs        = outputs
        self.truncate_gradient = truncate_gradient
        self.go_backwards   = go_backwards
+        self.slice_to_seqs  = slice_to_seqs

        self.fn = theano.function(inputs,outputs, mode = mode, givens = givens)
+        assert not numpy.any( [isinstance(x.variable,theano.compile.SharedVariable) for x in \
+            self.fn.maker.inputs])
+


    def make_node(self,*inputs):
@@ -572,44 +593,42 @@ class Scan(theano.Op):


    def __eq__(self,other):
-      # the self.apply_output_types are a function of all these things
-      # no need to compare it as well
-      rval = type(self) == type(other)
-      if rval:
-        rval = (self.inputs == other.inputs) and \
-               (self.outputs == other.outputs) and \
-               (self.givens  == other.givens) and \
-               (self.stored_steps_output == other.stored_steps_output) and \
-               (self.seqs_taps == other.seqs_taps) and \
-               (self.outs_taps == other.outs_taps) and \
-               (self.inplace_map == other.inplace_map) and \
-               (self.n_seqs == other.n_seqs) and\
-               (self.inplace == other.inplace) and\
-               (self.go_backwards == other.go_backwards) and\
-               (self.truncate_gradient == other.truncate_gradient) and\
-               (self.n_outs == other.n_outs) and\
-               (self.n_args == other.n_args)
-      return rval
+        # the self.apply_output_types are a function of all these things
+        # no need to compare it as well
+        rval = type(self) == type(other)
+        if rval:
+            rval = (self.inputs == other.inputs) and \
+            (self.outputs == other.outputs) and \
+            (self.givens  == other.givens) and \
+            (self.store_steps == other.store_steps) and \
+            (self.seqs_taps == other.seqs_taps) and \
+            (self.outs_taps == other.outs_taps) and \
+            (self.inplace_map == other.inplace_map) and \
+            (self.n_seqs == other.n_seqs) and\
+            (self.inplace == other.inplace) and\
+            (self.go_backwards == other.go_backwards) and\
+            (self.truncate_gradient == other.truncate_gradient) and\
+            (self.n_outs == other.n_outs) and\
+            (self.n_args == other.n_args)
+        return rval
      

    def __hash__(self):
-      # the self.apply_output_types are a function of all these things
-      # no need to compare it as well
-      return hash(type(self)) ^ \
-             hash(self.n_seqs) ^ \
-             hash(self.n_outs) ^ \
-             hash(self.inplace) ^\
-             hash(self.go_backwards) ^\
-             hash(self.truncate_gradient) ^\
-             hash(self.n_args) ^ \
-             hash_listsDictsTuples(self.outputs) ^ \
-             hash_listsDictsTuples(self.inputs) ^ \
-             hash_listsDictsTuples(self.givens) ^ \
-             hash_listsDictsTuples(self.seqs_taps) ^\
-             hash_listsDictsTuples(self.outs_taps) ^\
-             hash_listsDictsTuples(self.stored_steps_output)
-
-
+        # the self.apply_output_types are a function of all these things
+        # no need to compare it as well
+        return hash(type(self)) ^ \
+            hash(self.n_seqs) ^ \
+            hash(self.n_outs) ^ \
+            hash(self.inplace) ^\
+            hash(self.go_backwards) ^\
+            hash(self.truncate_gradient) ^\
+            hash(self.n_args) ^ \
+            hash_listsDictsTuples(self.outputs) ^ \
+            hash_listsDictsTuples(self.inputs) ^ \
+            hash_listsDictsTuples(self.givens) ^ \
+            hash_listsDictsTuples(self.seqs_taps) ^\
+            hash_listsDictsTuples(self.outs_taps) ^\
+            hash_listsDictsTuples(self.store_steps)


    def perform(self,node,args, outs):
@@ -643,20 +662,20 @@ class Scan(theano.Op):
            n_steps = args[0]
        
        for i in xrange(self.n_seqs):
-          if self.seqs_taps.has_key(i):
-              # compute actual length of the sequence ( we need to see what
-              # past taps this sequence has, and leave room for them 
-              seq_len = args[i+1].shape[0] + min(self.seqs_taps[i])
-              if  max( self.seqs_taps[i]) > 0: 
-                  # using future values, so need to end the sequence earlier
-                  seq_len -= max(self.seqs_taps[i])
-              if n_steps == 0 :
-                  # length of the sequences, leaving room for the largest
-                  n_steps = seq_len
-              if seq_len != n_steps : 
-                  warning(('Input sequence %d has a shorter length then the '
-                          'expected number of steps %d')%(i,n_steps))
-                  n_steps = min(seq_len,n_steps)
+            if self.seqs_taps.has_key(i):
+                # compute actual length of the sequence ( we need to see what
+                # past taps this sequence has, and leave room for them 
+                seq_len = args[i+1].shape[0] + min(self.seqs_taps[i])
+                if  max( self.seqs_taps[i]) > 0: 
+                    # using future values, so need to end the sequence earlier
+                    seq_len -= max(self.seqs_taps[i])
+                if n_steps == 0 :
+                    # length of the sequences, leaving room for the largest
+                    n_steps = seq_len
+                if seq_len != n_steps : 
+                    warning(('Input sequence %d has a shorter length then the '
+                        'expected number of steps %d')%(i,n_steps))
+                    n_steps = min(seq_len,n_steps)



@@ -667,144 +686,202 @@ class Scan(theano.Op):

 
        # check lengths of init_outs
-        for i in xrange(self.n_seqs+1, \
-                        self.n_seqs+self.n_outs+1):
-          if self.outs_taps.has_key(i-self.n_seqs-1):
-            if self.outs_taps[i-self.n_seqs-1] != [-1]:
-              req_size = abs(min(self.outs_taps[i-self.n_seqs-1]))-1
-              if args[i].shape[0] < req_size:
-                warning(('Initial state for output %d has fewer values then '
-                    'required by the maximal past value %d. Scan will use 0s'
-                    ' for missing values')%(i-self.n_iterable-1,req_size))
+        for i in xrange(self.n_seqs+1, self.n_seqs+self.n_outs+1):
+            if self.outs_taps.has_key(i-self.n_seqs-1):
+                if self.outs_taps[i-self.n_seqs-1] != [-1]:
+                    req_size = abs(min(self.outs_taps[i-self.n_seqs-1]))-1
+                    if args[i].shape[0] < req_size:
+                        warning(('Initial state for output %d has fewer values then '
+                            'required by the maximal past value %d. Scan will use 0s'
+                            ' for missing values')%(i-self.n_iterable-1,req_size))
            
        self.n_steps = n_steps
        y = self.scan(self.fn, args[1:],self.n_seqs, self.n_outs, 
                 self.seqs_taps, self.outs_taps, n_steps, self.go_backwards, 
                 inplace_map)

-
-        # write to storage, converting if needed
+        '''
+        # write to storage, converting if needed ( why do we have the wrong dtype !???)
+        # -- solved --
        for i in xrange(self.n_outs):
            if hasattr(node.outputs[i], 'dtype'):
                outs[i][0] = theano._asarray(y[i], dtype=node.outputs[i].dtype)
            else:
                outs[i][0] = y[i]
+        '''
+        for i in xrange(self.n_outs):
+            if self.store_steps[i] > 1 : 
+                # we need to reorder the steps .. to have them in the correct order
+                # we use numpy advanced indexing for this
+                # index order : 
+                index_order = range(self.idx_store_steps[i],self.store_steps[i]) + \
+                              range(self.idx_store_steps[i])
+                outs[i][0] = y[i][index_order]
+            else:
+                outs[i][0] = y[i]

+            

+    def scan(self, fn, args, n_seqs, n_outs, seqs_taps, outs_taps,  n_steps, go_backwards, inplace_map):
+        ''' Actual loop of the scap op perform function '''
+        # Note that we removed the n_steps from the args for this function, so the 
+        # order of arguments is slightly different compared to perform 
+        y = []
+        # When you have taps, you need to leave borders in your sequences, initial outputs
+        # for those taps; here we compute what are those borders for sequences
+        seqs_mins = {}
+        for j in xrange(n_seqs):
+            if seqs_taps.has_key(j):
+                seqs_mins.update({j:  min(seqs_taps[j])})

-    def scan(self, fn, args, n_seqs, n_outs, seqs_taps, outs_taps,  n_steps, 
-             go_backwards, inplace_map):
+        # create storage space for the outputs ( using corresponding inputs if we are
+        # dealing with inplace operations
+        # `idx_store_steps` is a dictionary telling us the current position in y of an 
+        # output where we want to store only the last k steps

-      y = []
-      for i in xrange(n_outs):
-        if inplace_map.has_key(i) and (inplace_map[i] >= 0):
-          y += [args[inplace_map[i]]]
-        else:
-          if self.stored_steps_output[i] == 1 :
-            y+= [ None ]
-          else:
-            arg_shape = args[i+n_seqs].shape[1:]
-            if (not self.outs_taps.has_key(i)) or \
-                    self.outs_taps[i] == [-1]:
-                arg_shape = args[i+n_seqs].shape
-            if self.stored_steps_output[i] < 1 :
-                y_shape = (n_steps,)+arg_shape
-            else:
-                y_shape = (self.stored_steps_output[i],)+arg_shape
-            y += [numpy.empty(y_shape, dtype=args[i+n_seqs].dtype)]
-      seqs_mins = {}
-      for j in xrange(n_seqs):
-        if seqs_taps.has_key(j):
-          seqs_mins.update({j:  min(seqs_taps[j])})
-
-      outs_mins = {}
-      initOuts_size = {}
-      for j in xrange(n_outs):
-        if outs_taps.has_key(j):
-          outs_mins.update({j: min(outs_taps[j])})
-          if self.outs_taps[j] != [-1]:
-              initOuts_size.update({j: args[n_seqs+j].shape[0]})
-          else:
-              initOuts_size.update({j: 0})
-
-
-      for i in xrange(n_steps):
-        fn_args = []
-
-        # sequences over which scan iterates
-        # check to see if we are scaning them backwards or no
-        _i = i
-        if go_backwards:
-            _i = n_steps-1-i
-        for j in xrange(n_seqs):
-          if seqs_taps.has_key(j):
-            ls_taps = seqs_taps[j]
-            min_tap = seqs_mins[j]
-            for tap_value in ls_taps:
-                k = _i - min_tap + tap_value
-                fn_args += [args[j][k]]

+        self.idx_store_steps = {}
+        for i in xrange(n_outs):

-        # past values of outputs
+            if inplace_map.has_key(i) and seqs_taps.has_key(inplace_map[i]) and\
+                    seqs_taps[inplace_map[i]] >=0:
+                y += [args[inplace_map[i]][:n_steps]]
+            else:
+                # check if you are using past value .. through in a warning and do not 
+                # work inplace
+                if inplace_map.has_key(i) and seqs_taps.has_key(inplace_map[i]) and seqs_taps[inplace_map[i]] < 0:
+                    warning('Can not work inplace because of past values')
+                if self.store_steps[i] == 1 :
+                    y+= [ None ]
+                else:
+                    arg_shape = args[i+n_seqs].shape[1:]
+                    if (not self.outs_taps.has_key(i)) or self.outs_taps[i] == [-1]:
+                        arg_shape = args[i+n_seqs].shape
+                    if self.store_steps[i] < 1 :
+                        y_shape = (n_steps,)+arg_shape
+                    else:
+                        # we need to store only a fixed number of steps of our output
+                        self.idx_store_steps[i] = 0
+                        y_shape = (self.store_steps[i],)+arg_shape
+                    y += [numpy.empty(y_shape, dtype=args[i+n_seqs].dtype)]
+
+        # and here we compute the borders for initial states of outputs
+        outs_mins = {}
+        initOuts_size = {}
        for j in xrange(n_outs):
-          if outs_taps.has_key(j):
-            ls_taps = outs_taps[j]
-            min_tap = outs_mins[j]
-            sz = initOuts_size[j]
-            for tap_value in ls_taps:
-              if i + tap_value < 0:
-                if sz < 1:
-                    fn_args += [args[j+n_seqs] ]
+            if outs_taps.has_key(j):
+                outs_mins.update({j: min(outs_taps[j])})
+                if self.outs_taps[j] != [-1]:
+                    initOuts_size.update({j: args[n_seqs+j].shape[0]})
                else:
-                  k = i + sz + tap_value
-                  if k < 0:
-                     # past value not provided.. issue a warning and use 0s
-                      fn_args += [numpy.zeros(args[j+n_seqs][0].shape)]
-                      warning(('Past value %d for output %d not given in '
-                               'inital out') % (j,tap_value))
-                  else:
-                    fn_args += [args[j+n_seqs][k]]
-              else:
-                if self.stored_steps_output[j] < 1:
-                    fn_args += [y[j][i + tap_value]]
-                elif self.stored_steps_output[j] == 1:
-                    fn_args += [y[j] ]
+                    initOuts_size.update({j: 0})
+
+        ############## THE MAIN LOOP ############################
+        for i in xrange(n_steps):
+            fn_args = []
+            # sequences over which scan iterates
+            # check to see if we are scaning them backwards or no
+            # and get a new index ``_i`` accordingly
+            _i = i
+            if go_backwards:
+                _i = n_steps-1-i
+            # collect data from sequences 
+            for j in xrange(n_seqs):
+                # get borders
+                if seqs_taps.has_key(j):
+                    ls_taps = seqs_taps[j]
+                    min_tap = seqs_mins[j]
+                    for tap_value in ls_taps:
+                        # use the borders to figure out what value you actually need
+                        k = _i - min_tap + tap_value
+                        fn_args += [args[j][k]]
+
+            # past values of outputs
+            for j in xrange(n_outs):
+                if outs_taps.has_key(j):
+                    ls_taps = outs_taps[j]
+                    min_tap = outs_mins[j]
+                    sz = initOuts_size[j]
+                    for tap_value in ls_taps:
+                        if i + tap_value < 0:
+                            if sz < 1:
+                                # this is a special case, when our initial state has no 
+                                # temporal dimension 
+                                fn_args += [args[j+n_seqs] ]
+                            else:
+                                k = i + sz + tap_value
+                                if k < 0:
+                                    # past value not provided.. issue a warning and use 0s of the 
+                                    # correct dtype
+                                    fn_args += [numpy.zeros(args[j+n_seqs][0].shape, dtype =
+                                        args[j+n_sqs][0].dtype)]
+                                    warning(('Past value %d for output %d not given in '
+                                        'inital out') % (j,tap_value))
+                                else:
+                                    fn_args += [args[j+n_seqs][k]]
+                        else:
+                            if self.store_steps[j] < 1:
+                                # no limit on how many steps to store from our output
+                                fn_args += [y[j][i + tap_value]]
+                            elif self.store_steps[j] == 1:
+                                # just the last one
+                                fn_args += [y[j] ]
+                            else:
+                                # storing only the last k 
+                                # get what idx we want 
+                                req_idx = (self.idx_store_steps[j] + tap_value + self.store_steps[j])
+                                # we need this modula self.store_steps[j]
+                                req_idx = req_idx % self.store_steps[j]
+                                fn_args += [y[j][req_idx] ]
+
+            # get the non-iterable sequences
+            fn_args += list(args[(n_seqs+n_outs):])
+            # compute output
+            something = fn(*fn_args)
+            #update outputs
+            for j in xrange(n_outs):
+                if self.store_steps[j] <1:
+                    # if you have provided no size for the missing output you might find yourself
+                    # here with a incorect array .. if that happens realocate memory for the
+                    # needed array
+                    try : 
+                        if hasattr(something[j],'dtype') and (y[j].dtype != something[j].dtype) :
+                            raise ValueError('wrong dtype')
+
+                        y[j][i] = something[j]
+                    except :
+
+                        y[j]= numpy.empty((n_steps,)+something[j].shape, dtype= something[j].dtype)
+                        y[j][i] = something[j]
+
+                elif self.store_steps[j] == 1:
+                    try:
+                        if hasattr(something[j],'dtype') and y[j].dtype != something[j].dtpye:
+                            raise ValueError('wrong dtype')
+                        y[j] = something[j]
+                    except:
+                        y[j] = numpy.empty( something[j].shape, dtype = something[j].dtype)
+                        y[j] = something[j]
                else:
-                    raise NotImplementedError('This will be implemented in the near future')
-        # get the non-iterable sequences
-        fn_args += list(args[(n_seqs+n_outs):])
-        # compute output
-        something = fn(*fn_args)
-        #update outputs
-        for j in xrange(n_outs):
-          if self.stored_steps_output[j] <1:
-              # if you have provided no size for the missing output you might find yourself
-              # here with a incorect array .. if that happens realocate memory for the needed
-              # array
-              try : 
-                  y[j][i] = something[j]
-              except :
-                  y[j] = numpy.empty( (n_steps,)+something[j].shape , dtype =
-                                                   something[j].dtype)
-                  y[j][i] = something[j]
-          elif self.stored_steps_output[j] == 1:
-              try:
-                  y[j] = something[j]
-              except:
-                  y[j] = numpy.empty( something[j].shape, dtype = something[j].dtype)
-                  y[j] = something[j]
-          else:
-            raise NotImplementedError('This will be implemented in the near future')
-      return y
-
+                    try:
+                        if hasattr(something[j],'dtype') and y[j].dtype != something[j].dtype:
+                            raise ValueError('worng dtype')
+                        y[j][self.idx_store_steps[j]] = something[j]
+                        self.idx_store_steps[j] = (self.idx_store_steps[j] + 1) % self.store_steps[j]
+                    except:
+                        y[j] = numpy.empty( (self.store_steps[j],)+something[j].shape, \
+                                dtype = something[j].dtype)
+                        y[j][idx_sotre_steps[j]] = something[j]
+                        self.idx_store_steps[j] = (self.idx_store_steps[j] + 1) % self.store_steps[j]
+        return y

    def grad(self, args, g_outs):
-
+        
        raise NotImplementedError('This will be implemented in the near future');
        '''
        if True: 
           #((self.updates.keys() != []) or (self.inplace_map.keys() != [])\
-           # or numpy.any(self.stored_steps_output)):
+           # or numpy.any(self.store_steps)):
           # warning('Can not compute gradients if inplace or updates ' \
           #         'are used or if you do not keep past value of outputs.'\
           #         'Use force_gradient if you know for sure '\
@@ -872,18 +949,16 @@ class Scan(theano.Op):
 @gof.local_optimizer([None])
 def scan_make_inplace(node):
    op = node.op
-    if isinstance(op, Scan) and (not op.inplace) \
-                            and (op.inplace_map.keys() != []):
-        return Scan((op.inputs, op.outputs, op.givens ) , op.n_seqs,  
-                    op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps, 
-                    op.truncate_gradient, op.go_backwards, op.stored_steps_output,
-                    inplace=True 
-                      ).make_node(*node.inputs).outputs
+    if isinstance(op, Scan) and (not op.inplace) and (op.inplace_map.keys() != []):
+        return Scan((op.inputs, op.outputs, op.givens, op.slice_to_seqs ) , op.n_seqs,  
+            op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps, 
+            op.truncate_gradient, op.go_backwards, op.store_steps,
+            inplace=True ).make_node(*node.inputs).outputs
    return False
        
        
 optdb.register('scanOp_make_inplace', opt.in2out(scan_make_inplace,
-               ignore_newtrees=True), 75, 'fast_run', 'inplace')
+    ignore_newtrees=True), 75, 'fast_run', 'inplace')




--- a/theano/tests/test_scan.py
+++ b/theano/tests/test_scan.py
@@ -9,9 +9,9 @@ import numpy.random
 from theano.tests  import unittest_tools as utt

 def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None, 
-                mode = None, cast_to_output_type = False):
-    pt = [numpy.array(p) for p in pt]
+        mode = None, cast_to_output_type = False):

+    pt = [numpy.array(p) for p in pt]
    _type_tol = dict( float32=1e-2, float64=1e-4)

    if tol is None:
@@ -20,7 +20,7 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None,
    if rng is None:
        rng = numpy.random
        utt.seed_rng()
-    
+
    def function(inputs, outputs):
        if mode is None:
            f = theano.function(inputs, outputs, accept_inplace=True)
@@ -30,8 +30,8 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None,

    for test_num in xrange(n_tests):
        tensor_pt=[theano.tensor.value(p.copy(),name='input %i'%i) 
-                                       for i,p in enumerate(pt)]
-    # op outputs
+                for i,p in enumerate(pt)]
+        # op outputs
    o_outputs = op(*tensor_pt)
    if not (type(o_outputs) in (list,tuple)):
        o_outputs = [ o_outputs ]
@@ -44,15 +44,15 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None,
    random_projection = rng.rand(*o_fn_outs[0].shape)
    if cast_to_output_type:
        random_projection = numpy.array(random_projection, 
-                             dtype = o_fn_outs[0].dtype)
-    t_r = theano.tensor.as_tensor_variable(random_projection)
+                dtype = o_fn_outs[0].dtype)
+        t_r = theano.tensor.as_tensor_variable(random_projection)
    cost = theano.tensor.sum( t_r * o_outputs[0])
    for i, o in enumerate(o_fn_outs[1:] ):
        random_projection = rng.rand(*o.shape)
        if cast_to_output_type:
            random_projection = numpy.array(random_projection,
-                                            dtype=o_outputs[i].dtype)
-        t_r  = theano.tensor.as_tensor_variable(random_projection)
+                    dtype=o_outputs[i].dtype)
+            t_r  = theano.tensor.as_tensor_variable(random_projection)
        cost += theano.tensor.sum( t_r * o_outputs[i])
    cost_fn = function(tensor_pt, cost)
    num_grad = theano.tensor.numeric_grad(cost_fn,[p.copy() for p in pt],eps)
@@ -60,7 +60,7 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None,
    if cast_to_output_type:
        g_cost = cast(g_cost, o_output.dtype)
    symbolic_grad = theano.tensor.grad(cost, tensor_pt, g_cost)
-    
+

    grad_fn = function(tensor_pt,symbolic_grad)
    analytic_grad = grad_fn(*[p.copy() for p in pt])
@@ -70,7 +70,7 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None,
    max_err, max_err_pos = num_grad.max_err(analytic_grad)
    if max_err > tol:
        raise Exception(theano.tensor.verify_grad.E_grad, 
-                                    (max_err, tol, max_err_pos))
+                (max_err, tol, max_err_pos))


 #TODO: Test this function, and if it works,
@@ -87,17 +87,8 @@ def scan_project_sum(*args, **kwargs):
    return  sum([(s * rng.uniform(size=s.shape)).sum() for s in scan_outputs])


-
-
-def compareArrays(a,b):
-    if type(a) in (list,tuple):
-        a = numpy.array(a)
-    if type(b) in (list, tuple):
-        b = numpy.array(b)
-
-    return numpy.all( abs(a-b) < 1e-5)
-
 class T_Scan(unittest.TestCase):
+
    def setUp(self):
        utt.seed_rng()

@@ -106,86 +97,129 @@ class T_Scan(unittest.TestCase):
    def test_generator_one_output_scalar(self):
        def f_pow2(x_tm1):
            return 2*x_tm1
-    
-        s = theano.tensor.dscalar()
+
+        state = theano.tensor.dscalar()
        n_steps = theano.tensor.dscalar()
-        Y, updts = theano.scan(f_pow2, [],s, [],n_steps = n_steps)
-        f1 = theano.function([s,n_steps], Y, updates = updts)
-     
-        assert compareArrays(f1(1,3), [2,4,8])
+        output, updates = theano.scan(f_pow2, [],state, [],n_steps = n_steps, truncate_gradient
+                = -1, go_backwards = False)
+        my_f = theano.function([state,n_steps], output, updates = updates)
+        
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        state = rng.uniform()
+        steps = 5

+        numpy_values = numpy.array([ state*(2**(k+1)) for k in xrange(steps) ])
+        theano_values = my_f(state,steps)
+        cmp = numpy_values == theano_values
+        assert numpy.all(cmp)

-    # simple rnn, one input, one state, weights for each; input/state are 
-    # vectors, weights are scalars
+
+    # simple rnn, one input, one state, weights for each; input/state
+    # are vectors, weights are scalars
    def test_one_sequence_one_output_weights(self):
        def f_rnn(u_t,x_tm1,W_in, W):
            return u_t*W_in+x_tm1*W
-    
+
        u    = theano.tensor.dvector()
        x0   = theano.tensor.dscalar()
        W_in = theano.tensor.dscalar()
        W    = theano.tensor.dscalar()

-        Y, updts = theano.scan(f_rnn, u,x0,[W_in,W])
-    
-        f2    = theano.function([u,x0,W_in,W], Y, updates = updts)
-        v_u   = numpy.array([1.,2.,3.,4.])
-        v_x0  = numpy.array(1)
-        v_out = numpy.array([1.1,1.3,1.6,2.])
-        assert  compareArrays( f2(v_u,v_x0,.1,1), v_out   ) 
+        output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = 0, truncate_gradient =
+                -1, go_backwards = False)
+
+        f2   = theano.function([u,x0,W_in,W], output, updates = updates)
+        # get random initial values
+        rng  = numpy.random.RandomState(utt.fetch_seed())
+        v_u  = rng.uniform( size = (4,), low = -5., high = 5.)
+        v_x0 = rng.uniform()
+        W    = rng.uniform()
+        W_in = rng.uniform()
+
+        # compute the output in numpy
+        v_out = numpy.zeros((4,))
+        v_out[0] = v_u[0]*W_in + v_x0 * W
+        for step in xrange(1,4):
+            v_out[step] = v_u[step]*W_in + v_out[step-1] * W
+        
+        theano_values = f2(v_u,v_x0, W_in, W)
+        assert numpy.all(abs(theano_values - v_out) < 1e-5)


-    # simple rnn, one input, one state, weights for each; input/state are 
-    # vectors, weights are scalars; using shared variables
+
+    # simple rnn, one input, one state, weights for each; input/state
+    # are vectors, weights are scalars; using shared variables
    def test_one_sequence_one_output_weights_shared(self):
+        rng   = numpy.random.RandomState(utt.fetch_seed())
        u    = theano.tensor.dvector() 
        x0   = theano.tensor.dscalar()
-        W_in = theano.shared(.1, name = 'w_in')
-        W    = theano.shared(1., name ='w')
-    
-        def f_rnn_shared(u_t,x_tm1, l_W_in, l_W):
-            return u_t*l_W_in+x_tm1*l_W
-    
-        Y, updts = theano.scan(f_rnn_shared, u,x0,[W_in, W] )
-
-        f3    = theano.function([u,x0], Y, updates = updts)
-        v_u   = numpy.array([1.,2.,3.,4.])
-        v_x0  = numpy.array(1.)
-        v_out = numpy.array([1.1,1.3,1.6,2.])
-        assert  compareArrays(f3(v_u,v_x0),v_out)
+        W_in = theano.shared(rng.uniform(), name = 'w_in')
+        W    = theano.shared(rng.uniform(), name ='w')
+
+        def f_rnn_shared(u_t,x_tm1, tmp_W_in, tmp_W):
+            return u_t*tmp_W_in+x_tm1*tmp_W
+
+        output, updates = theano.scan(f_rnn_shared, u,x0,[W_in, W], n_steps =0,
+                truncate_gradient= -1, go_backwards = False)
+        f3    = theano.function([u,x0], output, updates = updates)
+        # get random initial values
+
+        v_u   = rng.uniform( size = (4,), low = -5., high = 5.)
+        v_x0  = rng.uniform()
+        # compute the output i numpy
+        v_out = numpy.zeros((4,))
+        v_out[0] = v_u[0]*W_in.value + v_x0*W.value
+        for step in xrange(1,4):
+            v_out[step] = v_u[step]*W_in.value + v_out[step-1]*W.value
+        
+        theano_values = f3(v_u, v_x0)
+        assert  numpy.all(abs(theano_values - v_out) < 1e-5)



-    # some rnn with multiple outputs and multiple inputs; other dimension 
-    # instead of scalars/vectors
+    # some rnn with multiple outputs and multiple inputs; other
+    # dimension instead of scalars/vectors
    def test_multiple_inputs_multiple_outputs(self):
-        W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
-        W     = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
-        W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        vW_in2 = rng.uniform(size = (2,), low = -5.,high = 5.)
+        vW     = rng.uniform(size = (2,2), low = -5.,high = 5.)
+        vWout  = rng.uniform(size = (2,), low = -5.,high = 5.)
+        vW_in1 = rng.uniform(size = (2,2), low = -5.,high = 5.)
+        v_u1   = rng.uniform(size = (3,2), low = -5., high = 5.)
+        v_u2   = rng.uniform(size = (3,), low = -5.,high = 5.)
+        v_x0   = rng.uniform(size = (2,), low = -5.,high = 5.)
+        v_y0   = rng.uniform()
+
+        W_in2 = theano.shared(vW_in2, name='win2')
+        W     = theano.shared(vW, name='w')
+        W_out = theano.shared(vWout, name = 'wout')
        W_in1 = theano.tensor.dmatrix('win')
        u1    = theano.tensor.dmatrix('u1')
        u2    = theano.tensor.dvector('u2')
        x0    = theano.tensor.dvector('x0')
        y0    = theano.tensor.dscalar('y0')
-    
+
        def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
            return [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
                    theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]

-        Y, updts = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
+        outputs, updates = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1, n_steps = 0,
+                truncate_gradient = -1, go_backwards = False)

-        f4     = theano.function([u1,u2,x0,y0,W_in1], Y, updates = updts)
-        v_u1   = numpy.array([[1.,2.],[1.,2.],[1.,2.]])
-        v_u2   = numpy.array([1.,2.,3.])
-        v_x0   = numpy.array([0.,0.])
-        v_y0   = numpy.array(1)
-        v_Win1 = numpy.array([[1.,1.],[1.,1.]])
-        v_x    = numpy.array([[4.,5.],[18.,16.],[58.,43.]])
-        v_y    = numpy.array([0.,7.,25.])
-        (x,y) =  f4( v_u1, v_u2, v_x0, v_y0, v_Win1)
-         
-        assert  compareArrays(x,v_x) 
-        assert  compareArrays(y,v_y)
+        f4     = theano.function([u1,u2,x0,y0,W_in1], outputs, updates = updates)
+        # compute the values in numpy
+        v_x = numpy.zeros((3,2))
+        v_y = numpy.zeros((3,))
+        v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + numpy.dot(v_x0,vW)
+        v_y[0] = numpy.dot(v_x0,vWout)
+        for i in xrange(1,3):
+            v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + numpy.dot(v_x[i-1],vW)
+            v_y[i] = numpy.dot(v_x[i-1], vWout)
+
+        (theano_x,theano_y) =  f4( v_u1, v_u2, v_x0, v_y0, vW_in1)
+        
+        assert numpy.all(abs(theano_x - v_x) < 1e-5)
+        assert numpy.all(abs(theano_y - v_y) < 1e-5)



@@ -193,22 +227,39 @@ class T_Scan(unittest.TestCase):
    # vectors, weights are scalars; using shared variables and past 
    # taps (sequences and outputs)
    def test_using_taps_input_output(self):
+        rng   = numpy.random.RandomState(utt.fetch_seed())
+        vW    = rng.uniform()
+        vW_in = rng.uniform()
+        vu    = rng.uniform(size=(4,), low = -5., high = 5.)
+        vx0   = rng.uniform(size=(2,), low = -5., high = 5.)
+
        u    = theano.tensor.dvector()
        x0   = theano.tensor.dvector()
-        W_in = theano.shared(.1, name = 'w_in')
-        W    = theano.shared(1., name ='w')
-    
+        W_in = theano.shared(vW_in, name = 'w_in')
+        W    = theano.shared(vW, name ='w')
+
        def f_rnn_shared(u_tm2, x_tm1, x_tm2):
            return u_tm2*W_in+x_tm1*W+x_tm2
-    
-        Y, updates = theano.scan(f_rnn_shared, dict(input=u, taps=-2), 
-                 dict(initial = x0, taps = [-1,-2]), [])

-        f7   = theano.function([u,x0], Y, updates = updates)
-        v_u  = numpy.asarray([1.,2.,3.,4.])
-        v_x0 = numpy.asarray([1.,2.])
-        out  = numpy.asarray([3.1,5.3])
-        assert   compareArrays( out, f7(v_u, v_x0))
+        outputs, updates = theano.scan(f_rnn_shared, dict(input=u, taps=-2), 
+                dict(initial = x0, taps = [-1,-2]), [], n_steps = 0, truncate_gradient = -1, 
+                go_backwards = False)
+
+        f7   = theano.function([u,x0], outputs, updates = updates)
+        theano_out = f7(vu,vx0)
+
+        # compute output in numpy
+        # a bit of explaining:
+        # due to the definition of sequences taps in scan, v_0[0] is actually v_0[-2], 
+        # and v_0[1] is v_0[-1]. The values v_0[2] and v_0[3] do not get uesd ( because you 
+        # do not use v_0[t] in scan) which might seem strange, but then again why not use 
+        # v_0[t] instead of v_0[t-2] in a real application ??
+        # also vx0[0] corresponds to vx0[-2], vx0[1] to vx0[-1]
+        numpy_out = numpy.zeros((2,))
+        numpy_out[0] = vu[0]*vW_in + vx0[1]*vW + vx0[0]
+        numpy_out[1] = vu[1]*vW_in + numpy_out[0]*vW + vx0[1]
+
+        assert numpy.all(abs(numpy_out - theano_out) < 1e-5)



@@ -216,192 +267,352 @@ class T_Scan(unittest.TestCase):
    # vectors, weights are scalars; using shared variables and past 
    # taps (sequences and outputs) and future taps for sequences
    def test_past_future_taps_shared(self):
+        rng   = numpy.random.RandomState(utt.fetch_seed())
+        vW    = rng.uniform()
+        vW_in = rng.uniform()
+        vu    = rng.uniform(size=(6,), low = -5., high = 5.)
+        vx0   = rng.uniform(size=(2,), low = -5., high = 5.)
+
        u    = theano.tensor.dvector()
        x0   = theano.tensor.dvector()
-        W_in = theano.shared(.1, name = 'w_in')
-        W    = theano.shared(1., name ='w')
-    
+        W_in = theano.shared(vW_in, name = 'w_in')
+        W    = theano.shared(vW, name ='w')
+
        def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
            return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
-    
-        Y,updts = theano.scan(f_rnn_shared, dict( input = u, taps=[-2,2]),\
-                 dict(initial = x0, taps = [-1,-2]), [])
-
-        f8   = theano.function([u,x0], Y, updates = updts)
-        v_u  = numpy.array([1.,2.,3.,4.,5.,6.])
-        v_x0 = numpy.array([1.,2.])
-        out  = numpy.array([3.6, 6.4])
-
-        assert compareArrays( out, f8(v_u, v_x0) ) 
-
-
-
-    # simple rnn ; compute inplace
-    def test_inplace(self):
-        u    = theano.tensor.dvector()
-        mu   = theano.Param( u, mutable = True)
-        x0   = theano.tensor.dscalar()
-        W_in = theano.shared(.1)
-        W    = theano.shared(1.)
-
-        def f_rnn_shared(u_t, x_tm1):
-            return u_t*W_in + x_tm1*W
-        Y, updts = theano.scan(f_rnn_shared, u, \
-                           dict( initial = x0, inplace =u),mode='FAST_RUN' )
-
-        f9   = theano.function([mu,x0], Y , updates = updts)
-        v_u  = numpy.array([1.,2.,3.])
-        v_x0 = numpy.array(1.)

-        out = f9(v_u, v_x0)
-        v_out = numpy.array([1.1,1.3,1.6])
-
-        assert (compareArrays(out, v_out))
-        assert (compareArrays(v_u, out))
+        output,updates = theano.scan(f_rnn_shared, dict( input = u, taps=[-2,2]),\
+                dict(initial = x0, taps = [-1,-2]), [], n_steps =0, truncate_gradient =-1,
+                go_backwards = False)
+
+        f8   = theano.function([u,x0], output, updates = updates)
+        theano_out = f8(vu,vx0)
+        # compute output in numpy 
+        numpy_out = numpy.zeros(2)
+        # think of vu[0] as vu[-2], vu[4] as vu[2]
+        # and vx0[0] as vx0[-2], vx0[1] as vx0[-1]
+        numpy_out[0] = (vu[0]+vu[4])*vW_in + vx0[1]*vW + vx0[0]
+        numpy_out[1] = (vu[1]+vu[5])*vW_in + numpy_out[0]*vW + vx0[1]
+
+        assert numpy.all(abs(numpy_out - theano_out) < 1e-5)
+
+
+
+    # simple rnn ; compute inplace version 1
+    def test_inplace1(self):
+        rng   = numpy.random.RandomState(utt.fetch_seed())
+        vW    = numpy.random.uniform()
+        vW_in = numpy.random.uniform()
+        vu0   = rng.uniform(size=(3,), low = -5., high = 5.)
+        vu1   = rng.uniform(size=(3,), low = -5., high = 5.)
+        vu2   = rng.uniform(size=(3,), low = -5., high = 5.)
+        vx0   = rng.uniform()
+        vx1   = rng.uniform()
+
+        u0   = theano.tensor.dvector('u0')
+        u1   = theano.tensor.dvector('u1')
+        u2   = theano.tensor.dvector('u2')
+        mu0  = theano.Param( u0, mutable = False)
+        mu1  = theano.Param( u1, mutable = True)
+        mu2  = theano.Param( u2, mutable = True)
+        x0   = theano.tensor.dscalar('x0')
+        x1   = theano.tensor.dscalar('y0')
+        W_in = theano.shared(vW_in,'Win')
+        W    = theano.shared(vW,'W')
+        mode = theano.compile.mode.get_mode(None).including('inplace')
+        def f_rnn_shared(u0_t,u1_t, u2_t, x0_tm1,x1_tm1):
+            return [u0_t*W_in + x0_tm1*W + u1_t*u2_t, u0_t*W_in + x1_tm1*W+ u1_t+u2_t ] 
+
+        outputs, updates = theano.scan(f_rnn_shared, [u0,u1,u2], 
+                [dict( initial = x0, inplace =u2), dict(initial = x1, inplace = u1)],
+                [], n_steps = 0, truncate_gradient = -1, go_backwards = False, mode=mode )
+        f9   = theano.function([mu0,mu1,mu2,x0,x1], outputs , updates = updates, mode = mode)
+
+       # compute output in numpy
+        numpy_x0 = numpy.zeros((3,))
+        numpy_x1 = numpy.zeros((3,))
+        numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0]*vu2[0]
+        numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu1[0]+vu2[0]
+        for i in xrange(1,3):
+            numpy_x0[i] = vu0[i]* vW_in + numpy_x0[i-1]*vW + vu1[i]*vu2[i]
+            numpy_x1[i] = vu0[i]* vW_in + numpy_x1[i-1]*vW + vu1[i]+vu2[i]
+
+        # note theano computes inplace, so call function after numpy equivalent is done
+        (theano_x0, theano_x1) = f9(vu0,vu1,vu2,vx0,vx1)
+        # assert that theano does what it should
+        assert numpy.all( abs(theano_x0 - numpy_x0) < 1e-5)
+        assert numpy.all( abs(theano_x1 - numpy_x1) < 1e-5)
+        # assert that it was done in place
+        assert numpy.all( theano_x0 == vu2)
+        assert numpy.all( theano_x1 == vu1)
+
+    # simple rnn ; compute inplace version 2
+    def test_inplace2(self):
+        rng   = numpy.random.RandomState(utt.fetch_seed())
+        vW    = numpy.random.uniform()
+        vW_in = numpy.random.uniform()
+        vu0   = rng.uniform(size=(3,), low = -5., high = 5.)
+        vu1   = rng.uniform(size=(4,), low = -5., high = 5.)
+        vu2   = rng.uniform(size=(5,), low = -5., high = 5.)
+        vx0   = rng.uniform()
+        vx1   = rng.uniform()
+
+        u0   = theano.tensor.dvector('u0')
+        u1   = theano.tensor.dvector('u1')
+        u2   = theano.tensor.dvector('u2')
+        mu0  = theano.Param( u0, mutable = True)
+        mu1  = theano.Param( u1, mutable = True)
+        mu2  = theano.Param( u2, mutable = True)
+        x0   = theano.tensor.dscalar('x0')
+        x1   = theano.tensor.dscalar('y0')
+        W_in = theano.shared(vW_in,'Win')
+        W    = theano.shared(vW,'W')
+        mode = theano.compile.mode.get_mode(None).including('inplace')
+        def f_rnn_shared(u0_t,u1_t,u1_tp1, u2_tm1,u2_t,u2_tp1, x0_tm1,x1_tm1):
+            return [u0_t*W_in + x0_tm1*W + u1_t*u1_tp1, \
+                    u0_t*W_in + x1_tm1*W+ u2_tm1+u2_t+u2_tp1 ] 
+
+        outputs, updates = theano.scan(f_rnn_shared, 
+                [u0,dict(input = u1, taps = [0,1]),dict( input = u2, taps= [-1,0,+1])], 
+                [dict( initial = x0, inplace =u2), dict(initial = x1, inplace = u1)],
+                [], n_steps = 0, truncate_gradient = 01, go_backwards = False, mode=mode )
+        f9   = theano.function([mu0,mu1,mu2,x0,x1], outputs , updates = updates, mode = mode)
+
+       # compute output in numpy
+        numpy_x0 = numpy.zeros((3,))
+        numpy_x1 = numpy.zeros((3,))
+        numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0]*vu1[1]
+        numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu2[0]+vu2[1]+vu2[2]
+        for i in xrange(1,3):
+            numpy_x0[i] = vu0[i]* vW_in + numpy_x0[i-1]*vW + vu1[i]*vu1[i+1]
+            numpy_x1[i] = vu0[i]* vW_in + numpy_x1[i-1]*vW + vu2[i]+vu2[i+1]+vu2[i+2]
+
+        # note theano computes inplace, so call function after numpy equivalent is done
+        (theano_x0, theano_x1) = f9(vu0,vu1,vu2,vx0,vx1)
+        # assert that theano does what it should
+        assert numpy.all( abs(theano_x0 - numpy_x0) < 1e-5)
+        assert numpy.all( abs(theano_x1 - numpy_x1) < 1e-5)
+        # assert that it was done in place
+        # not that x0 should not be inplace of vu2 because you are using past values of u2, 
+        # and therefore you are not allowed to work inplace !!
+        assert not numpy.all( theano_x0 == vu2[1:4])
+        assert numpy.all( theano_x1 == vu1[0:3])



    # Shared variable with updates
    def test_shared_arguments_with_updates(self):
-        W1_vals = numpy.random.rand(20,30)
-        W2_vals = numpy.random.rand(30,20)
-        u1_vals = numpy.random.rand(3,20)
-        u2_vals = numpy.random.rand(3,30)
-        y0_vals = numpy.random.rand(3,20)
-        y1_vals = numpy.random.rand(20) 
-        y2_vals = numpy.random.rand(30)    
-        W1 = theano.shared(W1_vals,'W1')
-        W2 = theano.shared(W2_vals,'W2')
-        
-        u1 = theano.shared(u1_vals,'u1')
-        y1 = theano.shared(y1_vals,'y1')
-        
+        rng = numpy.random.RandomState(utt.fetch_seed())
+
+        vW1 = rng.rand(20,30)
+        vW2 = rng.rand(30,20)
+        vu1 = rng.rand(3,20)
+        vu2 = rng.rand(3,30)
+        vy0 = rng.rand(3,20)
+        vy1 = rng.rand(20) 
+        vy2 = rng.rand(30)    
+        W1 = theano.shared(vW1,'W1')
+        W2 = theano.shared(vW2,'W2')
+        u1 = theano.shared(vu1,'u1')
+        y1 = theano.shared(vy1,'y1')
+
        def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
            y0_t = theano.dot(theano.dot(u1_t,W1),W2) + 0.1*y0_tm1 + \
-                                             0.33*y0_tm2 + 0.17*y0_tm3
+                    0.33*y0_tm2 + 0.17*y0_tm3
            y1_t = theano.dot(u2_t, W2) + y1_tm1
            y2_t = theano.dot(u1_t, W1)
            nwW1 = W1 + .1
            nwW2 = W2 + .05
+            # return outputs followed by a list of updates
            return ([y0_t, y1_t, y2_t], [( W1,nwW1), (W2, nwW2)])
-        
+
        u2 = theano.tensor.matrix('u2')
        y0 = theano.tensor.matrix('y0')
-        
-        Y,upds = theano.scan(f, [u1,u2], [ dict(initial = y0, taps = [-3,-2,-1]),y1, None])
-        
-        f = theano.function([u2,y0], Y, updates = upds)
-        vls = f(u2_vals, y0_vals)
-        
+
+        outputs,updates = theano.scan(f, [u1,u2], [ dict(initial = y0, taps = [-3,-2,-1]),y1,
+            None], [], n_steps = 0, go_backwards = False, truncate_gradient = -1)
+        f10 = theano.function([u2,y0], outputs, updates = updates)
+        theano_y0,theano_y1,theano_y2 = f10(vu2, vy0)
+
        # do things in numpy
-        v_y0 = numpy.zeros((6,20))
-        v_y1 = numpy.zeros((4,20))
-        v_y2 = numpy.zeros((3,30))
-        v_y0[:3] = y0_vals
-        v_y1[0]  = y1_vals
-        vW1      = W1_vals.copy()
-        vW2      = W2_vals.copy()
+        numpy_y0 = numpy.zeros((6,20))
+        numpy_y1 = numpy.zeros((4,20))
+        numpy_y2 = numpy.zeros((3,30))
+        numpy_y0[:3] = vy0
+        numpy_y1[0]  = vy1
+        numpy_W1     = vW1.copy()
+        numpy_W2    = vW2.copy()
        for idx in xrange(3):
-            v_y0[idx+3] = numpy.dot( numpy.dot(u1_vals[idx,:], vW1), vW2) + \
-                          0.1*v_y0[idx+2] + 0.33*v_y0[idx+1] + 0.17*v_y0[idx]
-            v_y1[idx+1] = numpy.dot( u2_vals[idx,:], vW2) + v_y1[idx]
-            v_y2[idx]   = numpy.dot( u1_vals[idx,:], vW1)
-            vW1 = vW1 + .1
-            vW2 = vW2 + .05
+            numpy_y0[idx+3] = numpy.dot( numpy.dot(vu1[idx,:], numpy_W1), numpy_W2) + \
+                    0.1*numpy_y0[idx+2] + 0.33*numpy_y0[idx+1] + 0.17*numpy_y0[idx]
+            numpy_y1[idx+1] = numpy.dot( vu2[idx,:], numpy_W2) + numpy_y1[idx]
+            numpy_y2[idx]   = numpy.dot( vu1[idx,:], numpy_W1)
+            numpy_W1 = numpy_W1 + .1
+            numpy_W2 = numpy_W2 + .05
+
+        assert numpy.all( abs(theano_y0 - numpy_y0[3:]) < 1e-5)
+        assert numpy.all( abs(theano_y1 - numpy_y1[1:]) < 1e-5)
+        assert numpy.all( abs(theano_y2 - numpy_y2    ) < 1e-5)
+        assert numpy.all( abs(W1.value  - numpy_W1    ) < 1e-5)
+        assert numpy.all( abs(W2.value  - numpy_W2    ) < 1e-5)
+
+
+
+    def test_simple_shared_random(self):
+
+        theano_rng = theano.tensor.shared_randomstreams.RandomStreams(utt.fetch_seed())

-        assert compareArrays(vls[0], v_y0[3:])
-        assert compareArrays(vls[1], v_y1[1:])
-        assert compareArrays(vls[2], v_y2)
-        assert compareArrays(vW1, W1.value)
-        assert compareArrays(vW2, W2.value)
+        values, updates = theano.scan(lambda : theano_rng.uniform((2,),-1,1), [],[],[],n_steps
+                = 5, truncate_gradient = -1, go_backwards = False)
+        my_f = theano.function([], values, updates = updates )

+        rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30)
+        rng = numpy.random.RandomState(int(rng_seed)) #int() is for 32bit
+
+        numpy_v = numpy.zeros((10,2))
+        for i in xrange(10):
+            numpy_v[i] = rng.uniform(-1,1,size = (2,))
+
+        theano_v = my_f()
+        assert numpy.all( abs(theano_v - numpy_v [:5,:]) < 1e-5)
+        theano_v = my_f()
+        assert numpy.all(abs(theano_v - numpy_v[5:,:]) < 1e-5)



    def test_gibbs_chain(self):
-        W_vals  = numpy.random.rand(20,30) -.5
-        vis_val = numpy.random.binomial(1,0.5, size=(3,20))
-        bvis = numpy.random.rand(20) -.5
-        bhid = numpy.random.rand(30) -.5
-        tW  = theano.shared(W_vals)
-        tbh = theano.shared(bhid)
-        tbv = theano.shared(bvis)
-        vis = theano.tensor.matrix()
-        trng = theano.tensor.shared_randomstreams.RandomStreams(123)
-        
-        def f(vsample):
-            hmean = theano.tensor.nnet.sigmoid(theano.dot(vsample,tW)+ tbh)
-            hsample = trng.binomial(hmean.shape,1,hmean)
-            vmean = theano.tensor.nnet.sigmoid(theano.dot(hsample,tW.T)+ tbv)
-            return trng.binomial(vsample.shape,1,vsample)
-        
-        v_vals, updts = theano.scan(f, [], [vis],[], n_steps = 10)
-        my_f = theano.function([vis], v_vals[-1], updates = updts)
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        v_W       = numpy.array(rng.rand(20,30) -.5, dtype = 'float32')
+        v_vsample = numpy.array(rng.binomial(1,0.5, size=(3,20), ), dtype = 'float32')
+        v_bvis    = numpy.array(rng.rand(20) -.5, dtype='float32')
+        v_bhid    = numpy.array(rng.rand(30) -.5, dtype='float32')
        
-        
-        def numpy_implementation(vsample):
-            rng = numpy.random.RandomState(123)
-            b1  = numpy.random.RandomState(rng.randint(2**30))
-            b2  = numpy.random.RandomState(rng.randint(2**30))
+        W       = theano.shared(v_W)
+        bhid    = theano.shared(v_bhid)
+        bvis    = theano.shared(v_bvis)
+        vsample = theano.tensor.matrix(dtype='float32')
+
+        trng = theano.tensor.shared_randomstreams.RandomStreams(utt.fetch_seed())

+        def f(vsample_tm1):
+            hmean_t   = theano.tensor.nnet.sigmoid(theano.dot(vsample_tm1,W)+ bhid)
+            hsample_t = theano.tensor.cast(trng.binomial(hmean_t.shape,1,hmean_t),dtype='float32')
+            vmean_t   = theano.tensor.nnet.sigmoid(theano.dot(hsample_t,W.T)+ bvis)
+            return theano.tensor.cast(trng.binomial(vmean_t.shape,1,vmean_t), dtype='float32')
+
+        theano_vsamples, updates = theano.scan(f, [], vsample,[], n_steps = 10,
+                truncate_gradient=-1, go_backwards = False)
+        my_f = theano.function([vsample], theano_vsamples[-1], updates = updates)
+
+        _rng = numpy.random.RandomState(utt.fetch_seed())
+        rng_seed = _rng.randint(2**30)
+        nrng1 = numpy.random.RandomState(int(rng_seed)) # int() is for 32bit
+
+        rng_seed = _rng.randint(2**30)
+        nrng2 = numpy.random.RandomState(int(rng_seed)) # int() is for 32bit
+        def numpy_implementation(vsample):
            for idx in range(10):
-                hmean = 1./(1. + numpy.exp(-(numpy.dot(vsample,W_vals) + bhid)))
-                hsample = b1.binomial(1,hmean, size = hmean.shape)
-                vmean  = 1./(1. + numpy.exp(-(numpy.dot(hsample,W_vals.T) + bvis)))
-                vsample = b2.binomial(1,vsample, size = vsample.shape)
+                hmean = 1./(1. + numpy.exp(-(numpy.dot(vsample,v_W) + v_bhid)))
+                hsample = numpy.array(nrng1.binomial(1,hmean, size = hmean.shape), dtype='float32')
+                vmean  = 1./(1. + numpy.exp(-(numpy.dot(hsample,v_W.T) + v_bvis)))
+                vsample = numpy.array(nrng2.binomial(1,vmean, size = vmean.shape),dtype='float32')

            return vsample
-        
-        t_res = my_f(vis_val)
-        n_res = numpy_implementation(vis_val)
-        
-        assert (compareArrays(t_res, n_res))
+
+        t_result = my_f(v_vsample)
+        n_result = numpy_implementation(v_vsample)
+
+        assert numpy.all( abs(t_result - n_result) < 1e-5)


    def test_only_shared_no_input_no_output(self):
-        s = theano.shared(1)
-        def f_pow2():
-            return {s: 2*s}
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        v_state = rng.uniform()
+        state = theano.shared(v_state)
+        def f_2():
+            return {state: 2*state}
        n_steps = theano.tensor.dscalar()
-        Y, updts = theano.scan(f_pow2, [],[], [],n_steps = n_steps)
-        f1 = theano.function([n_steps], Y, updates = updts)
-        f1(3)
-        assert compareArrays(s.value, 8)
- 
-    '''
-    # test gradient simple network 
-    def test_10(self):
-        pass
-     TO TEST: 
-        - test gradient (one output)
-        - test gradient (multiple outputs)
-        - test gradient (go_bacwards) 
-        - test gradient (multiple outputs / some uncomputable )
-        - test gradient (truncate_gradient)
-        - test_gradient (taps past/future)
-        - optimization !? 
-    '''
+        output, updates = theano.scan(f_2,[],[],[],n_steps = n_steps, truncate_gradient = -1,
+                go_backwards = False)
+        this_f = theano.function([n_steps], output, updates = updates)
+        n_steps = 3
+        this_f(n_steps)
+        numpy_state = v_state* (2**(n_steps))
+        assert state.value == numpy_state

    def test_map_functionality(self):
        def f_rnn(u_t):
            return u_t + 3
-    
+
        u    = theano.tensor.dvector()

-        Y, updts = theano.scan(f_rnn, u, [None])
-    
-        f2    = theano.function([u], Y, updates = updts)
-        v_u   = numpy.array([1.,2.,3.,4.])
-        assert compareArrays(f2(v_u), v_u+3)
+        outputs, updates = theano.scan(f_rnn, u,[],[], n_steps =0 , truncate_gradient = -1,
+                go_backwards = False)
+
+        f2    = theano.function([u], outputs, updates = updates)
+        rng = numpy.random.RandomState(utt.fetch_seed())
+
+        v_u   = rng.uniform(size=(5,), low = -5., high = 5.)
+        numpy_result = v_u + 3
+        theano_result = f2(v_u)
+        assert numpy.all(theano_result == numpy_result)


    def test_map(self):
-        from theano.scan import map as T_map
        v = theano.tensor.vector()
-        abs_expr,abs_updates = T_map(lambda x: abs(x), [v])
-        abser = theano.function([v],abs_expr,updates = abs_updates)
+        abs_expr,abs_updates = theano.map(lambda x: abs(x), v,[],n_steps =0,
+                truncate_gradient = -1, go_backwards = False)
+        f = theano.function([v],abs_expr,updates = abs_updates)
+
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        vals = rng.uniform(size=(10,), low = -5., high = 5.)
+        abs_vals = abs(vals)
+        theano_vals = f(vals)
+        assert numpy.all(abs_vals == theano_vals)
+
+    def test_backwards(self):
+        def f_rnn(u_t,x_tm1,W_in, W):
+            return u_t*W_in+x_tm1*W
+
+        u    = theano.tensor.dvector()
+        x0   = theano.tensor.dscalar()
+        W_in = theano.tensor.dscalar()
+        W    = theano.tensor.dscalar()
+
+        output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = 0, truncate_gradient =
+                -1, go_backwards = True)
+
+        f2   = theano.function([u,x0,W_in,W], output, updates = updates)
+        # get random initial values
+        rng  = numpy.random.RandomState(utt.fetch_seed())
+        v_u  = rng.uniform( size = (4,), low = -5., high = 5.)
+        v_x0 = rng.uniform()
+        W    = rng.uniform()
+        W_in = rng.uniform()
+
+        # compute the output in numpy
+        v_out = numpy.zeros((4,))
+        v_out[0] = v_u[3]*W_in + v_x0 * W
+        for step in xrange(1,4):
+            v_out[step] = v_u[3-step]*W_in + v_out[step-1] * W
+        
+        theano_values = f2(v_u,v_x0, W_in, W)
+        assert numpy.all(abs(theano_values - v_out) < 1e-5)
+
+
+
+
+    '''
+     TO TEST: 
+        - test gradient (one output)
+        - test gradient (multiple outputs)
+        - test gradient (go_bacwards) 
+        - test gradient (multiple outputs / some uncomputable )
+        - test gradient (truncate_gradient)
+        - test_gradient (taps past/future)
+        - optimization !? 
+    '''

-        assert compareArrays( abser(numpy.array([1.,-1])), [1.,1.])

 if __name__ == '__main__':
    unittest.main()