merge

618a8e25 · Pascal Lamblin · cfecf720 · 62d8c81a · cfecf720 · cfecf720
--- a/examples/logistic_regression.py
+++ b/examples/logistic_regression.py
-import sys
-sys.path.insert(0, '..')
-import theano
-from theano import tensor as T
-from theano.tensor import nnet
-from theano.compile import module
-from theano import printing, pprint
-from theano import compile
-
-import numpy as N
-
-
-class LogisticRegressionN(module.FancyModule):
-    class InstanceType(module.FancyModuleInstance):
-        def initialize(self, n_in, n_out, seed = None):
-            #self.component is the LogisticRegressionTemplate instance that built this guy.
-            rng = N.random.RandomState(seed)
-            
-            self.w = rng.randn(n_in, n_out)
-            self.b = rng.randn(n_out)
-            self.lr = 0.01
-            self.__hide__ = ['params']
-        def __eq__(self, other):
-            if not isinstance(other.component, LogisticRegressionN) and not isinstance(other.component, LogisticRegression2):
-                raise NotImplementedError
-         #we compare the member.
-            if (N.abs(self.w-other.w)<1e-8).all() and (N.abs(self.b-other.b)<1e-8).all() and self.lr == other.lr:
-                return True
-            return False
-        def __hash__(self):
-            raise NotImplementedError
-
-    def __init__(self, x = None, targ = None):
-        super(LogisticRegressionN, self).__init__() #boilerplate
-
-        self.x = x if x is not None else T.matrix()
-        self.targ = targ if targ is not None else T.lvector()
-
-        self.w = module.Member(T.matrix())   #automatically names
-        self.b = module.Member(T.vector())   #automatically names
-        self.lr = module.Member(T.dscalar()) #provides an external interface to change it
-        #and makes it an implicit input to any Method you build.
-
-        self.params = [self.w, self.b]
-
-        xent, y = nnet.crossentropy_softmax_1hot(
-                T.dot(self.x, self.w) + self.b, self.targ)
-        xent = T.sum(xent)
-
-        self.y = y
-        self.xent = xent
-
-        gparams = T.grad(xent, self.params)
-
-        self.update = module.Method([self.x, self.targ], xent,
-                updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
-        self.apply = module.Method([self.x], T.argmax(T.dot(self.x, self.w) + self.b, axis=1))
-
-class LogisticRegression2(module.FancyModule):
-    class InstanceType(module.FancyModuleInstance):
-        def initialize(self, n_in, seed = 1827):
-            #self.component is the LogisticRegressionTemplate instance that built this guy.
-
-            rng = N.random.RandomState(seed)
-            
-            self.w = rng.randn(n_in, 1)
-            self.b = rng.randn(1)
-            self.lr = 0.01
-            self.__hide__ = ['params']
-        def __eq__(self, other):
-            if not isinstance(other.component, LogisticRegressionN) and not isinstance(other.component, LogisticRegression2):
-                raise NotImplementedError
-         #we compare the member.
-            if (N.abs(self.w-other.w)<1e-8).all() and (N.abs(self.b-other.b)<1e-8).all() and self.lr == other.lr:
-                return True
-            return False
-        def __hash__(self):
-            raise NotImplementedError
-
-    def __init__(self, x = None, targ = None):
-        super(LogisticRegression2, self).__init__() #boilerplate
-
-        self.x = x if x is not None else T.matrix()
-        self.targ = targ if targ is not None else T.lcol()
-
-        self.w = module.Member(T.dmatrix())   #automatically names
-        self.b = module.Member(T.dvector())   #automatically names
-        self.lr = module.Member(T.dscalar()) #provides an external interface to change it
-        #and makes it an implicit input to any Method you build.
-
-        self.params = [self.w, self.b]
-
-        y = nnet.sigmoid(T.dot(self.x, self.w))
-        xent_elem = -self.targ * T.log(y) - (1.0 - self.targ) * T.log(1.0 - y)
-        xent = T.sum(xent_elem)
-
-        self.y = y
-        self.xent_elem = xent_elem
-        self.xent = xent
-
-        gparams = T.grad(xent, self.params)
-
-        self.update = module.Method([self.x, self.targ], xent,
-                                    updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
-        self.apply = module.Method([self.x], T.argmax(T.dot(self.x, self.w) + self.b, axis=1))
-        
-
-
-def main():
-    pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
-    pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
-    if 1:
-        lrc = LogisticRegressionN()
-
-        print '================'
-        print lrc.update.pretty()
-        print '================'
-        print lrc.update.pretty(mode = theano.Mode('py', 'fast_run'))
-        print '================'
-#         print lrc.update.pretty(mode = compile.FAST_RUN.excluding('inplace'))
-#         print '================'
-
-#        sys.exit(0)
-
-        lr = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'))
-        #lr = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'))
-        #lr = lrc.make(10, 2, mode=theano.Mode('py', 'merge')) #'FAST_RUN')
-
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5) > 0)
-
-        for i in xrange(10000):
-            lr.lr = 0.02
-            xe = lr.update(data_x, data_y) 
-            if i % 100 == 0:
-                print i, xe
-
-        print
-        print 'TRAINED MODEL:'
-        print lr
-
-    if 0:
-        lrc = LogisticRegression2()
-
-        lr = lrc.make(10, mode=theano.Mode('c|py', 'merge')) #'FAST_RUN')
-
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5, 1) > 0)
-
-        for i in xrange(10000):
-            xe = lr.update(data_x, data_y)
-            if i % 100 == 0:
-                print i, xe
-
-        print
-        print 'TRAINED MODEL:'
-        print lr
-
-if __name__ == '__main__':
-    main()
-
-
-
--- a/examples/tests/test_logistic_regression.py
+++ b/examples/tests/test_logistic_regression.py
-#!/usr/bin/env python
-#
-# UNIT TEST
-#
-import unittest
-import numpy
-from theano import gof
-
-from theano.gradient import *
-from theano import gradient
-import theano
-
-import sys
-from theano import tensor as T
-from theano.tensor import nnet
-from theano.compile import module
-from theano import printing, pprint
-from theano import compile
-
-import numpy as N
-
-class test_logistic_regression_example(unittest.TestCase):
-
-    def test_example_main(self): 
-        """Test that the file execute without trouble"""
-        import os
-        sys.path.append(os.path.realpath(".."))
-        import logistic_regression
-        logistic_regression.main()
-
-    def test_example_moduleN(self): 
-        """Test that the LogisticRegressionN module execute the same with different mode"""
-        import os
-        sys.path.append(os.path.realpath(".."))
-        import logistic_regression
-        pprint.assign(nnet.crossentropy_softmax_1hot_with_bias_dx, printing.FunctionPrinter('xsoftmaxdx'))
-        pprint.assign(nnet.crossentropy_softmax_argmax_1hot_with_bias, printing.FunctionPrinter('nll', 'softmax', 'argmax'))
-
-        lrc = logistic_regression.LogisticRegressionN()
-        lr0 = lrc.make(10, 2, seed=1827)
-        lr1 = lrc.make(10, 2, mode=theano.Mode('c|py', 'fast_run'), seed=1827)
-        lr2 = lrc.make(10, 2, mode=theano.Mode('py', 'fast_run'), seed=1827)
-        lr3 = lrc.make(10, 2, mode=theano.Mode('py', 'merge'), seed=1827) #'FAST_RUN')
-        lr4 = lrc.make(10, 2, mode=compile.FAST_RUN.excluding('fast_run'), seed=1827)
-        #FAST_RUN, FAST_COMPILE, 
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5) > 0)
-
-        def train(lr):
-            for i in xrange(1000):
-                lr.lr = 0.02
-                xe = lr.update(data_x, data_y) 
-
-        train(lr0)
-        train(lr1)
-        train(lr2)
-        train(lr3)
-        train(lr4)
-
-        assert lr0==lr1
-        assert lr0==lr2
-        assert lr0==lr3
-        assert lr0==lr4
-
-    def test_example_module2(self): 
-        """Test that the LogisticRegression2 module execute the same with different mode"""
-        import os
-        sys.path.append(os.path.realpath(".."))
-        import logistic_regression
-        lrc = logistic_regression.LogisticRegression2() #TODO: test 2==N
-        lr0 = lrc.make(10,1827)
-        lr1 = lrc.make(10, mode=theano.Mode('c|py', 'fast_run'), seed=1827)
-        lr2 = lrc.make(10, mode=theano.Mode('py', 'fast_run'), seed=1827)
-        lr3 = lrc.make(10, mode=theano.Mode('py', 'merge'), seed=1827) #'FAST_RU
-        lr4 = lrc.make(10, mode=compile.FAST_RUN.excluding('fast_run'), seed=1827)
-         #FAST_RUN, FAST_COMPILE, 
-        data_x = N.random.randn(5, 10)
-        data_y = (N.random.randn(5) > 0)
-        data_y = data_y.reshape((data_y.shape[0],1))#need to be a column
-        
-
-        def train(lr):
-            for i in xrange(1000):
-                lr.lr = 0.02
-                xe = lr.update(data_x, data_y) 
-
-        train(lr0)
-        train(lr1)
-        train(lr2)
-        train(lr3)
-        train(lr4)
-
-        assert lr0==lr1
-        assert lr0==lr2
-        assert lr0==lr3
-        assert lr0==lr4
-
-#        self.fail("NotImplementedError")
-
-if __name__ == '__main__':
-    from theano.tests import main
-    main(__file__)
--- a/examples/tests/test_wiki.py
+++ b/examples/tests/test_wiki.py
-import unittest
-
-import theano
-import numpy as N
-from theano import tensor as T
-from theano.tensor import nnet as NN
-from theano.compile import module as M
-
-class Blah(M.ModuleInstance):
-#        self.component #refer the Module
-#    def __init__(self, input = None, target = None, regularize = True):
-#        super(Blah, self)
-    def initialize(self,input_size = None, target_size = None, seed = 1827, 
-                   **init):
-        if input_size and target_size:
-            # initialize w and b in a special way using input_size and target_size
-            sz = (input_size, target_size)
-            rng = N.random.RandomState(seed)
-            self.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
-            self.b = N.zeros(target_size)
-            self.stepsize = 0.01
-        #we call default_initialize after as we want the parameter to superseed the default value.
-        M.default_initialize(self,**init)#equivalent to previous line.
-    def __eq__(self, other):
-        if not isinstance(other.component, SoftmaxXERegression1) and not isinstance(other.component, SoftmaxXERegression2):
-            raise NotImplementedError
-        #we compare the member.
-        if (self.w==other.w).all() and (self.b==other.b).all() and self.stepsize == other.stepsize:
-            return True
-        return False
-    def __hash__(self):
-        raise NotImplementedError
-
-    def fit(self, train, test):
-        pass
-
-class RegressionLayer1(M.Module):
-    InstanceType=Blah
-    def __init__(self, input = None, target = None, regularize = True):
-        super(RegressionLayer1, self).__init__() #boilerplate
-        # MODEL CONFIGURATION
-        self.regularize = regularize
-        # ACQUIRE/MAKE INPUT AND TARGET
-        if not input:
-            input = T.matrix('input')
-        if not target:
-            target = T.matrix('target')
-        # HYPER-PARAMETERS
-        self.stepsize = M.Member(T.scalar())  # a stepsize for gradient descent
-        # PARAMETERS
-        self.w = M.Member(T.matrix())  #the linear transform to apply to our input points
-        self.b = M.Member(T.vector())  #a vector of biases, which make our transform affine instead of linear
-        # REGRESSION MODEL
-        self.activation = T.dot(input, self.w) + self.b
-        self.prediction = self.build_prediction()
-        # CLASSIFICATION COST
-        self.classification_cost = self.build_classification_cost(target)
-        # REGULARIZATION COST
-        self.regularization = self.build_regularization()
-        # TOTAL COST
-        self.cost = self.classification_cost
-        if self.regularize:
-            self.cost = self.cost + self.regularization
-        # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
-        self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
-        # INTERFACE METHODS
-        self.update = M.Method([input, target],
-                                  self.cost,
-                                  w = self.w - self.stepsize * self.grad_w,
-                                  b = self.b - self.stepsize * self.grad_b)
-        self.apply = M.Method(input, self.prediction)
-    def params(self):
-        return self.w, self.b
-    def build_regularization(self):
-        return T.zero() # no regularization!
-
-class RegressionLayer2(M.Module):
-    def __init__(self, input = None, target = None, regularize = True):
-        super(RegressionLayer2, self).__init__() #boilerplate
-        # MODEL CONFIGURATION
-        self.regularize = regularize
-        # ACQUIRE/MAKE INPUT AND TARGET
-        if not input:
-            input = T.matrix('input')
-        if not target:
-            target = T.matrix('target')
-        # HYPER-PARAMETERS
-        self.stepsize = M.Member(T.scalar())  # a stepsize for gradient descent
-        # PARAMETERS
-        self.w = M.Member(T.matrix())  #the linear transform to apply to our input points
-        self.b = M.Member(T.vector())  #a vector of biases, which make our transform affine instead of linear
-        # REGRESSION MODEL
-        self.activation = T.dot(input, self.w) + self.b
-        self.prediction = self.build_prediction()
-        # CLASSIFICATION COST
-        self.classification_cost = self.build_classification_cost(target)
-        # REGULARIZATION COST
-        self.regularization = self.build_regularization()
-        # TOTAL COST
-        self.cost = self.classification_cost
-        if self.regularize:
-            self.cost = self.cost + self.regularization
-        # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
-        self.grad_w, self.grad_b = T.grad(self.cost, [self.w, self.b])
-        # INTERFACE METHODS
-        self.update = M.Method([input, target],
-                                  self.cost,
-                                  w = self.w - self.stepsize * self.grad_w,
-                                  b = self.b - self.stepsize * self.grad_b)
-        self.apply = M.Method(input, self.prediction)
-    def params(self):
-        return self.w, self.b
-    def _instance_initialize(self, obj, input_size = None, target_size = None, 
-                             seed = 1827, **init):
-        # obj is an "instance" of this module holding values for each member and
-        # functions for each method
-        if input_size and target_size:
-            # initialize w and b in a special way using input_size and target_size
-            sz = (input_size, target_size)
-            rng = N.random.RandomState(seed)
-            obj.w = rng.uniform(size = sz, low = -0.5, high = 0.5)
-            obj.b = N.zeros(target_size)
-            obj.stepsize = 0.01
-        # here we call the default_initialize method, which takes all the name: value
-        # pairs in init and sets the property with that name to the provided value
-        # this covers setting stepsize, l2_coef; w and b can be set that way too
-        # we call it after as we want the parameter to superseed the default value.
-        M.default_initialize(obj,**init)
-    def build_regularization(self):
-        return T.zero() # no regularization!
-
-class SoftmaxXERegression1(RegressionLayer1):
-    """ XE mean cross entropy"""
-    def build_prediction(self):
-        return NN.softmax(self.activation)
-    def build_classification_cost(self, target):
-        #self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
-        self.classification_cost_matrix = (target - self.prediction)**2
-        self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
-        return T.sum(self.classification_costs)
-    def build_regularization(self):
-        self.l2_coef = M.Member(T.scalar()) # we can add a hyper parameter if we need to
-        return self.l2_coef * T.sum(self.w * self.w)
-
-
-class SoftmaxXERegression2(RegressionLayer2):
-    """ XE mean cross entropy"""
-    def build_prediction(self):
-        return NN.softmax(self.activation)
-    def build_classification_cost(self, target):
-        #self.classification_cost_matrix = target * T.log(self.prediction) + (1 - target) * T.log(1 - self.prediction)
-        self.classification_cost_matrix = (target - self.prediction)**2
-        self.classification_costs = -T.sum(self.classification_cost_matrix, axis=1)
-        return T.sum(self.classification_costs)
-    def build_regularization(self):
-        self.l2_coef = M.Member(T.scalar()) # we can add a hyper parameter if we need to
-        return self.l2_coef * T.sum(self.w * self.w)
-
-
-class T_test_wiki_module(unittest.TestCase):
-    def test_Module_basic_example1(self):
-        n, c = T.scalars('nc')
-        inc = theano.function([n, ((c, c + n), 0)], [])
-        dec = theano.function([n, ((c, c - n), inc.container[c])], []) # we need to pass inc's container in order to share
-        plus10 = theano.function([(c, inc.container[c])], c + 10)
-        assert inc[c] == 0
-        inc(2)
-        assert inc[c] == 2 and dec[c] == inc[c]
-        dec(3)
-        assert inc[c] == -1 and dec[c] == inc[c]
-        assert plus10() == 9
-
-    def test_Module_basic_example2(self):
-        m = M.Module()
-        n = T.scalar('n')
-        m.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
-        m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n
-        m.dec = M.Method(n, [], c = m.c - n) # k.c <= k.c - n
-        m.dec = M.Method(n, [], updates = {m.c: m.c - n})
-        #m.dec = M.Method(n, [], updates = {c: m.c - n})#global c don't exist
-        #m.dec = M.Method(n, [], m.c = m.c - n) #python don't suppor this syntax
-        m.plus10 = M.Method([], m.c + 10) # m.c is always accessible since it is a member of this mlass
-        inst = m.make(c = 0) # here, we make an "instance" of the module with c initialized to 0
-        assert inst.c == 0
-        inst.inc(2)
-        assert inst.c == 2
-        inst.dec(3)
-        assert inst.c == -1
-        assert inst.plus10() == 9
-
-        inst = m.make(c = 5) # here, we make an "instance" of the module with c initialized to 0
-        assert inst.c == 5
-        inst.inc(2)
-        assert inst.c == 7
-        inst.dec(3)
-        assert inst.c == 4
-        assert inst.plus10() == 14
-
-    def test_Module_nesting_example1(self):
-        def make_incdec_function():
-            n, c = T.scalars('nc')
-            inc = theano.function([n, ((c, c + n), 0)], [])
-            dec = theano.function([n, ((c, c - n), inc.container[c])], [])
-            return inc,dec
-
-
-        inc1, dec1 = make_incdec_function()
-        inc2, dec2 = make_incdec_function()
-        a, b = T.scalars('ab')
-        sum = theano.function([(a, inc1.container['c']), (b, inc2.container['c'])], a + b)
-        inc1(2)
-        dec1(4)
-        inc2(6)
-        assert inc1['c'] == -2 and inc2['c'] == 6
-        assert sum() == 4 # -2 + 6
-
-    def test_Module_nesting_example2(self):
-        def make_incdec_module():
-            m = M.Module()
-            n = T.scalar('n')
-            m.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
-            m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n
-            m.dec = M.Method(n, [], c = m.c - n) # k.c <= k.c - n
-            return m
-
-        m = M.Module()
-        m.incdec1 = make_incdec_module()
-        m.incdec2 = make_incdec_module()
-        m.sum = M.Method([], m.incdec1.c + m.incdec2.c)
-        inst = m.make(incdec1 = dict(c=0), incdec2 = dict(c=0))
-        assert inst.incdec1.c==0 and inst.incdec2.c==0
-        inst.incdec1.inc(2)
-        inst.incdec1.dec(4)
-        inst.incdec2.inc(6)
-        assert inst.incdec1.c == -2 and inst.incdec2.c == 6
-        assert inst.sum() == 4 # -2 + 6
-
-    def test_Module_Advanced_example(self):
-        data_x = N.random.randn(4, 10)
-        data_y = [ [int(x)] for x in N.random.randn(4) > 0]
-        def test(model):
-            model = model.make(input_size = 10,
-                               target_size = 1,
-                               stepsize = 0.1)
-            print model.stepsize
-            self.failUnless( model.w.shape == (10,1) and model.b.shape == (1,))
-            assert model.stepsize == 0.1
-            for i in xrange(1000):
-                xe = model.update(data_x, data_y)
-                if i % 100 == 0:
-                    print i, xe
-                    pass
-            #for inputs, targets in my_training_set():
-                #print "cost:", model.update(inputs, targets)
-
-
-            print "final weights:", model.w
-            print "final biases:", model.b
-
-            #Print "some prediction:", model.prediction(some_inputs)
-            return model
-        m1=test(SoftmaxXERegression1(regularize = False))
-        m2=test(SoftmaxXERegression2(regularize = False))
-        print "m1",m1 
-        print "m2",m2
-        print m2==m1
-        print m1==m2
-        assert m2==m1 and m1==m2
-
-    def test_Module_extending_module_methods(self):
-        model_module = SoftmaxXERegression1(regularize = False)
-        model_module.sum = M.Member(T.scalar()) # we add a module member to hold the sum
-        model_module.update.updates.update(sum = model_module.sum + model_module.cost) # now update will also update sum!
-
-        model = model_module.make(input_size = 4,
-                                  target_size = 2,
-                                  stepsize = 0.1,
-                                  sum = 0) # we mustn't forget to initialize the sum
-        print model.stepsize
-        self.failUnless( model.w.shape == (4,2) and model.b.shape == (2,))
-        assert model.stepsize == 0.1
-
-        test = model.update([[0,0,1,0]], [[0,1]]) 
-        test += model.update([[0,1,0,0]], [[1,0]])
-        assert model.sum == test
-
-    def test_Module_basic_example2_more(self):
-        m = M.Module()
-        m2 = M.Module()
-        m2.name="m2" # for better error
-        #top level don't have name, but other have auto name.
-        
-        n = T.scalar('n')
-        m.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
-        m2.c = M.Member(T.scalar()) # state variables must be wrapped with ModuleMember
-        m.dec = M.Method(n, [], c = m.c - n)
-        m.inc = M.Method(n, [], c = m.c + n) # m.c <= m.c + n
-#        m.inc = M.Method(n, [], c = c + n)#fail c not defined
-#syntax error
-#        m.inc = M.Method(n, [], m.c = m.c + n)#fail
-        m.inc = M.Method(n, [], updates={m.c: m.c + n})
-#        m.inc = M.Method(n, [], updates={c: m.c + n})#fail with NameError
-#        m.inc = M.Method(n, [], updates={m.c: c + n})#fail with NameError
-#        m.inc = M.Method(n, [], updates={c: c + n})#fail with NameError
-
-        m.inc = M.Method(n, [], updates={m.c: m2.c + n})#work! should be allowed?
-        a = M.Module()
-        a.m1 = m
-        a.m2 = m2
-        a.make()#should work.
-#        self.assertRaises(m.make(c = 0), Error)
-        m.inc = M.Method(n, [], updates={m2.c: m.c + n})#work! should be allowed?
-#        self.assertRaises(m.make(c = 0), Error)
-#        m.inc = M.Method(n, [], updates={m.c: m2.c + m.c+ n})#work! should be allowed?
-        m2.inc = M.Method(n, [], updates={m2.c: m2.c + 2*m.c+ n})#work! should be allowed?
-#        self.assertRaises(m.make(c = 0), Error)
-
-
-if __name__ == '__main__':
-    from theano.tests import main
-    main("test_wiki")
--- a/theano/compile/profilemode.py
+++ b/theano/compile/profilemode.py
@@ -4,6 +4,7 @@ from theano.gof.link import WrapLinkerMany
 from theano.gof.cutils import run_cthunk
 from theano.compile.mode import Mode, register_mode, predefined_modes, predefined_linkers, predefined_optimizers, default_linker, default_optimizer
 from theano.gof.cc import OpWiseCLinker
+from theano.gof.python25 import any
 from theano import gof
 import theano.config as config


--- a/theano/sandbox/conv.py
+++ b/theano/sandbox/conv.py
@@ -13,46 +13,58 @@ def getFilterOutShp(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
    return  N.int64(N.ceil((inshp[1:] + s*kshp - s*1)/\
            N.array([dx,dy], dtype='float')))

+
+def conv(border_mode, subsample=(1,1), imshp=None, kshp=None, **kargs):
+    """
+    This fct return an instanciated ConvOp but give better name for some param.
+    We do this instead of changing the ConvOp interface to don't change all code
+    used up to now.
+
+    :type border_mode: string
+    :param border_mode:'valid'(only apply kernel over complete patch of the image)
+                       or 'full'(padd the image with 0 and apply the kernel over all full patch and partial patch of the image
+    :type subsample: tuple of len 2
+    :param subsample: how many pixel we move in the (row,col) direction of the image when we change of patch
+    :type imshp: tuple of len 4
+    :param imshp: (batch size, stack size, nb row, nb col)
+    :type kshp: tuple of len 4
+    :param kshp: (nb kernel, stack size, nb row, nb col)
+    """
+    if imshp is not None and kshp is not None:
+        assert imshp[1]==kshp[1]
+        nkern = kshp[0]
+        bsize = imshp[0]
+        kshp = kshp[:2]
+        imshp = imshp[1:]
+    else:
+        nkern, bsize = None, None
+        
+    return ConvOp(output_mode=border_mode, dx=subsample[0], dy=subsample[1],
+                  imshp=imshp, kshp=kshp, nkern=nkern, bsize=bsize,**kargs)
+
 class ConvOp(Op):
    """
-    A convolution op that should mimic scipy.signal.convolve2d, but faster!
-    In development.
+    A convolution op that should extend scipy.signal.convolve2d, but much faster!
    """


    
    __attrnames = ['imshp', 'kshp', 'nkern', 'bsize', 'dx', 'dy', 'out_mode', 
-            'unroll_batch', 'unroll_kern',
+            'unroll_batch', 'unroll_kern', 'unroll_patch',
            'imshp_logical', 'kshp_logical', 'kshp_logical_top_aligned']
-    #FRED: I added both unroll as we don't want ops to be merged if they have different value. Otherwise, the tests for the unroll don't work correctly.
    """These attributes uniquely identify the behaviour of this op for given inputs"""

-    #TODO: make the stacksize its own parameter, and make imshp a pair
-
    def __init__(self, imshp=None, kshp=None, nkern=None, bsize=None, dx=None, dy=None, output_mode='valid',
            unroll_batch=0,
            unroll_kern=0,
-            unroll_patch=False,
+            unroll_patch=True,
            imshp_logical=None,
            kshp_logical=None,
            kshp_logical_top_aligned=True,
            verbose=0,
            version=-1):
        """
-        
-
-        imshp - image shape tuple of 2 or 3: 2 for a 2d image, 3 for a stack of 2d images.
-        kshp - kernel shape 2
-        nkern - # kernels
-        bsize - batch size
-        dx - patch stride rows
-        dy - patch stride cols
-        out_mode - 'valid', 'full'
-        unroll_patch - c code generation option
-        unroll_batch - c code generation option
-        unroll_kern - c code generation option
-        verbose - passed to GpuConv
-        version - passed to GpuConv
+        This Op implement the convolution of a kernel(tensor 4d,(nkern, stacksize, nb row, nb col)) on an image(tensor 4d, (batchsize, stacksize, nb row, nb col). The batch size is multiple image that we want to apply the same kernel over. The nkern is numtiple kernel that we want to apply to each image. The stack size is mostly used when their is multiple layer in the network. It is the sum of the convolution of multiple 2d image and kernel.

        The reason that this op does the summation over convolutions within the 'stack' is that
        it allows us to be memory-efficient about how gradients are calculated.  If, for
@@ -62,24 +74,67 @@ class ConvOp(Op):
        gradient on the filters.


-        unroll_patch. If True will use a version that is faster then without not unroll by unroll the patch loop.
-        unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
-        unroll_nkern. idem as unroll_batch but unroll the kernel loop.
+        If the imshp, kshp, nkern and bsize are provided, we can generate more optimal code. This make a significant difference for the full mode with unroll_patch version.
+        The most frequent faster code currently available on 64_x86 computer is unroll_batch=4, unroll_kern=4, unroll_patch=False and this request that all the optional shape information are gived. Those number are empirically tested and backed up by the article: Anatomy of High-Performance Matrix Multiplication by Kazushige Goto and Robert A. Van De Geijn, ACM Transactions on Mathematical Software, vol 34, No. 3, article 12, May 2008. It is in figure 12, it give the value mr x nr, those value are the optimum to use for unroll_batch and unroll_kern. For x86_64 bits computer it is 4x4. Other architecture can have different value.(2x4 for x86, 8x8 for itanium,...)

-        The version is with unroll_batch=4 and unroll_nkern if possible(currenctly it don't support logical shape != physical shape) as this is what give the best performance in practice. This also tell that to have the best performance, you should have a batch size and a number of kernel multiple of 4. In the article:
-        Anatomy of High-Performance Matrix Multiplication by Kazushige Goto and Robert A. Van De Geijn, ACM Transactions on Mathematical Software, vol 34, No. 3, article 12, May 2008.
-        In figure 12, it give the value mr x nr, those value are the optimum to use for unroll_batch and unroll_kern. For x86_64 bits computer it is 4x4. Other architecture can have different value.(2x4 for x86, 8x8 for itanium,...)
-        """
-        imshp = tuple(imshp)
-        if len(imshp)==2:
-            self.imshp = (1,)+imshp
-        elif len(imshp)==3:
-            self.imshp = imshp
-        else:
-            raise Exception("bad len for imshp")
-        del imshp
+        :type out_mode: string
+        :param out_mode: 'valid'(give an output smaller then the image, 'full'(give an output bigger then the image)

-        self.kshp = tuple(kshp)
+        optional parameter(if provided will be used to generate more optinal c code):
+        
+        :type imshp: tuple of len 2 or 3: 2 for 2d image, 3 for a stack of 2d images.
+        :param imshp: (stacksize, nb image row, nb image col)
+        :type kshp: tuple of len 2
+        :param kshp: (nb kernel row, nb kernel col)
+        :type nkern: int
+        :param nkern: the number of kernel
+        :type bsize: int
+        :param bsize: the size of the minibatch
+        :type dx: int
+        :param dx: patch stride rows
+        :type dy: int
+        :param dx: patch stride cols
+
+        param to select the version of code used:
+        :type unroll_patch: bool
+        :param unroll_patch: use a version of c_code that unroll the patch loop that don't request all shape information to work, but if all shape information are present, will use it to hardcode the value in the code for faster code.
+
+        :type unroll_batch:int
+        :param unroll_batch: use a version of c_code that unroll the batch(by unroll_batch) and the nkern(by unroll_kern) loop. The size must by a multiple of bsize or nkern respectively.
+        :type unroll_kern:int
+        :param unroll_kern: use a version of c_code that unroll the batch(by unroll_batch) and the nkern(by unroll_kern) loop. The size must by a multiple of bsize or nkern respectively.
+
+        :type verbose: int
+        :param verbose: passed to GpuConv
+        :type version: int
+        :param version: passed to GpuConv
+
+        :param imshp_logical: used internally when we generate the gradient when dx!=1 or dy!=1
+        :param kshp_logical: idem
+        :param kshp_logical_top_aligned: idem
+        
+        """
+        all_shape = imshp is not None and kshp is not None and nkern is not None and bsize is not None
+        if (unroll_batch>0 or unroll_kern>0) and not all_shape:
+            raise Exception("In ConvOp, when using unroll_batch and unroll_nkern, all shape are needed")
+        if not all_shape and (imshp is not None or kshp is not None or nkern is not None or bsize is not None):
+            print "OPTIMISATION WARNING: passing only a few shape to ConvOp for faster code is useless. We use all of them or none."
+        
+        if not all_shape:
+            unroll_patch = True
+
+        if imshp is not None:
+            imshp = tuple(imshp)
+            if len(imshp)==2:
+                imshp = (1,)+imshp
+            elif len(imshp)==3:
+                imshp = imshp
+            else:
+                raise Exception("bad len for imshp")
+        self.imshp = imshp
+        if kshp is not None:
+            kshp = tuple(kshp)
+        self.kshp = kshp
        self.nkern = nkern
        self.bsize=bsize
        self.dx=dx
@@ -89,7 +144,7 @@ class ConvOp(Op):
        # a triple
        self.imshp_logical = self.imshp
        if imshp_logical is not None: self.imshp_logical = tuple(imshp_logical)
-        assert len(self.imshp) == len(self.imshp_logical)
+        assert (self.imshp is None and self.imshp_logical is None) or (len(self.imshp) == len(self.imshp_logical))

        # a pair
        self.kshp_logical = self.kshp
@@ -123,13 +178,17 @@ class ConvOp(Op):
                    new-=1
                print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a divisor of nkern(%s)We revert it to %d. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern),new)
                self.unroll_kern=new
-        self.outshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (dx,dy), output_mode)
-        self.fulloutshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (1,1), output_mode)
+        if all_shape:
+            self.outshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (dx,dy), output_mode)
+            self.fulloutshp = getFilterOutShp(self.imshp_logical, self.kshp_logical, (1,1), output_mode)
+        else:
+            self.outshp = None
+            self.fulloutshp = None
        self.out_mode = output_mode
        if not self.out_mode in ["valid", "full"]:
            raise Exception("Mode %s not implemented"%self.out_mode)
       
-        if not (self.outshp > 0).all():
+        if all_shape and not (self.outshp > 0).all():
            raise Exception(("Bad size for the output shape. Verify that [post-supersampling] input shape (%s)"
                "and kern shape(%s) are ok. (hint: kerns must fit inside image in"
                "'valid' mode)")%(self.imshp_logical,self.kshp_logical))
@@ -220,47 +279,69 @@ class ConvOp(Op):
        # TODO: move these back out to global scope when they no longer cause an atexit error
        from scipy.signal.signaltools import  _valfrommode, _bvalfromboundary
        from scipy.signal.sigtools import _convolve2d
-        #print 'img2d (%s)'%str(self.imshp_logical), img2d
-        #print 'filtersflipped (%s)'%str(self.kshp_logical), filtersflipped
+        imshp = self.imshp
+        if imshp is None:
+            imshp = tuple(img2d.shape[1:])
+        kshp = self.kshp
+        if kshp is None:
+            kshp = tuple(filtersflipped.shape[2:])
+        bsize = self.bsize
+        if bsize is None:
+            bsize = img2d.shape[0]
+        nkern = self.nkern
+        if nkern is None:
+            nkern = filtersflipped.shape[0]
+        
+        imshp_logical = self.imshp_logical
+        if imshp_logical is None:
+            imshp_logical = imshp
+        kshp_logical = self.kshp_logical
+        if kshp_logical is None:
+            kshp_logical = kshp
+            
+        if self.fulloutshp is not None:
+            fulloutshp = tuple(self.fulloutshp)
+        else:
+            fulloutshp = tuple(getFilterOutShp(imshp_logical, kshp_logical, (1,1), self.out_mode))
+
        if z[0] is None:
-            z[0] = N.zeros((self.bsize,)+(self.nkern,)+tuple(self.fulloutshp),
+            z[0] = N.zeros((bsize,)+(nkern,)+fulloutshp,
                           dtype=img2d.dtype)
        zz=z[0]
        val = _valfrommode(self.out_mode)
        bval = _bvalfromboundary('fill')

-        batchsize = self.bsize
-        stacklen = self.imshp[0]
+        stacklen = imshp[0]

-        img2d = img2d.reshape((batchsize,)+ self.imshp)
-        filtersflipped = filtersflipped.reshape((self.nkern,stacklen)+self.kshp)
+        img2d = img2d.reshape((bsize,)+ imshp)
+        filtersflipped = filtersflipped.reshape((nkern,stacklen)+kshp)

        if self.imshp != self.imshp_logical:
            # assuming that to get from imshp to imshp logical we insert zeros in missing spots
-            rstride = int(N.ceil(self.imshp_logical[1] / float(self.imshp[1])))
-            cstride = int(N.ceil(self.imshp_logical[2] / float(self.imshp[2])))
-            buf = N.zeros((batchsize,)+ self.imshp_logical, dtype=img2d.dtype)
+            rstride = int(N.ceil(imshp_logical[1] / float(imshp[1])))
+            cstride = int(N.ceil(imshp_logical[2] / float(imshp[2])))
+            buf = N.zeros((bsize,)+ imshp_logical, dtype=img2d.dtype)
            buf[:,:,::rstride, ::cstride] = img2d
            img2d = buf
            del buf, rstride, cstride

-        if self.kshp != self.kshp_logical:
-            rstride = int(N.ceil(self.kshp_logical[0] / float(self.kshp[0])))
-            cstride = int(N.ceil(self.kshp_logical[1] / float(self.kshp[1])))
-            buf = N.zeros((self.nkern,stacklen)+ self.kshp_logical, dtype=filtersflipped.dtype)
-            if self.kshp_logical_top_aligned:
+        if kshp != kshp_logical:
+            rstride = int(N.ceil(kshp_logical[0] / float(kshp[0])))
+            cstride = int(N.ceil(kshp_logical[1] / float(kshp[1])))
+            buf = N.zeros((nkern,stacklen)+ self.kshp_logical, dtype=filtersflipped.dtype)
+            if kshp_logical_top_aligned:
                roffset=coffset=0
            else:
-                roffset=(self.kshp_logical[0] - (self.kshp[0]*rstride) - 1+rstride) % rstride
-                coffset=(self.kshp_logical[1] - (self.kshp[1]*cstride) - 1+cstride) % cstride
+                roffset=(kshp_logical[0] - (kshp[0]*rstride) - 1+rstride) % rstride
+                coffset=(kshp_logical[1] - (kshp[1]*cstride) - 1+cstride) % cstride
                assert roffset >= 0
                assert coffset >= 0
            buf[:,:,roffset::rstride, coffset::cstride] = filtersflipped
            filtersflipped = buf
            del buf, rstride, cstride

-        for b in range(batchsize):
-            for n in range(self.nkern):
+        for b in range(bsize):
+            for n in range(nkern):
                zz[b,n,...].fill(0)
                for im0 in range(stacklen):
                    zz[b,n,...] +=  _convolve2d(\
@@ -271,7 +352,7 @@ class ConvOp(Op):
        #execute the c version which is much faster.
        if self.dx>1 or self.dy>1:
            zz = zz[:,:,0::self.dx,0::self.dy].copy()
-        #print 'zz (%s)'%str((self.dx, self.dy)), zz
+
        z[0]=zz


@@ -286,6 +367,11 @@ class ConvOp(Op):
        if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
            raise NotImplementedError('todo')

+        all_shape = self.imshp is not None and self.kshp is not None and self.nkern is not None and self.bsize is not None
+
+        if not all_shape and (self.dx!=1 or self.dy!=1):
+            raise Exception("ConvOp.grad when dx!=1 or dy!=1 we must have all the optional shape information")
+        
        grad_hack_necessary = False
        if grad_hack_necessary:
            if self.dx!=1 or self.dy!=1:
@@ -301,25 +387,31 @@ class ConvOp(Op):
        newin = inputs.dimshuffle((1,0,2,3))
        newgz = gz.dimshuffle((1,0,2,3))
    
+        (bsize, nkern) = None, None
+        imshp = None
+        kshp = None
+        un_p = self.unroll_patch
+        imshp_logical = None
        if self.out_mode == 'valid':
            (img, filters) = (newin, newgz)
-            imshp_logical = None
            kshp_logical = self.fulloutshp
            kshp_logical_top_aligned=False
-            (bsize, nkern) = (self.imshp[0], self.nkern)
-            imshp = (self.bsize, self.imshp[1], self.imshp[2])
+            if all_shape:
+                (bsize, nkern) = (self.imshp[0], self.nkern)
+                imshp = (self.bsize, self.imshp[1], self.imshp[2])
            kshp  = self.outshp
            un_b = self.unroll_batch
            un_k = self.unroll_kern
            #print 'dw_valid', imshp, kshp, nkern, bsize
        elif self.out_mode == 'full':
            (img, filters) = (newgz, newin)
-            imshp_logical = (self.bsize, self.fulloutshp[0], self.fulloutshp[1])
            kshp_logical = None
            kshp_logical_top_aligned=True
-            (bsize, nkern) = (self.nkern, self.imshp[0])
-            imshp = (self.bsize, self.outshp[0], self.outshp[1])
-            kshp  = self.imshp[1:]
+            if all_shape:
+                imshp_logical = (self.bsize, self.fulloutshp[0], self.fulloutshp[1])
+                (bsize, nkern) = (self.nkern, self.imshp[0])
+                imshp = (self.bsize, self.outshp[0], self.outshp[1])
+                kshp  = self.imshp[1:]
            un_b = self.unroll_kern
            un_k = self.unroll_batch
            #print 'dw_full', imshp, kshp, nkern, bsize
@@ -329,7 +421,7 @@ class ConvOp(Op):
        filters = filters[:,:,::-1,::-1] #flip them
        
        #find good value for the unroll
-        if un_b!=0 and bsize%un_b!=0:
+        if all_shape and un_b!=0 and bsize%un_b!=0:
            if bsize<un_b:
                un_b = bsize
            else:
@@ -343,7 +435,7 @@ class ConvOp(Op):
                print "OPTIMISATION WARNING: in ConvOp.grad() we can't determine a good unroll value for the kernel. Maybe you can optimize this!"

        dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid',
-                    unroll_batch=un_b, unroll_kern=un_k,
+                    unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p,
                    imshp_logical=imshp_logical,
                    kshp_logical=kshp_logical,
                    kshp_logical_top_aligned=kshp_logical_top_aligned,
@@ -352,7 +444,8 @@ class ConvOp(Op):
        if hasattr(self,'flops'):
            dw.set_flops()
        dw = dw(img,filters)
-        assert (dw.owner.op.outshp==self.kshp).all()
+        if all_shape:
+            assert (dw.owner.op.outshp==self.kshp).all()
        if self.out_mode == 'valid':
            # before DimShuffle, dw is of shape visdim x nkern x kshp[0] x kshp[1]
            dw = dw.dimshuffle((1,0,2,3))
@@ -363,26 +456,34 @@ class ConvOp(Op):
        if not self.out_mode == 'full': mode = 'full'
        filters = kerns.dimshuffle((1,0,2,3))
        filters = filters[:,:,::-1,::-1]
-        nkern = self.imshp[0]
-        imshp = (self.nkern, self.outshp[0], self.outshp[1])
+        nkern = None
+        imshp = None
+        imshp_logical = None
+        kshp = None
+        if all_shape:
+            nkern = self.imshp[0]
+            imshp = (self.nkern, self.outshp[0], self.outshp[1])
+            imshp_logical=(self.nkern, self.fulloutshp[0], self.fulloutshp[1])
        #print 'din', imshp, self.kshp, nkern
        din = ConvOp(imshp, self.kshp, nkern, self.bsize, 
                     1,1, output_mode=mode,
-                     unroll_batch=un_b, unroll_kern=un_k,
-                     imshp_logical=(self.nkern, self.fulloutshp[0], self.fulloutshp[1]),
+                     unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p,
+                     imshp_logical=imshp_logical,
                     kshp_logical=None,
                     version=-1,#we we change the mode, we don't forward the version.
                     verbose=self.verbose)
        if hasattr(self,'flops'):
            din.set_flops()
        din = din(gz,filters)
-        assert (din.owner.op.outshp==self.imshp[1:]).all()
+        assert (din.owner.op.outshp is None and self.imshp is None) or (din.owner.op.outshp==self.imshp[1:]).all()
        return [din, dw]

-#def c():
    def c_headers(self):
        return ['<numpy/noprefix.h>', '<iostream>', '<sstream>' ]

+    def c_code_cache_version(self):
+        return (1)
+    
    def c_support_code(self):
        return """
 #define STRIDES(arr) ((arr)->strides)
@@ -400,24 +501,50 @@ using namespace std;
        assert node.inputs[0].type.dtype == node.inputs[1].type.dtype
        d=locals()
        d.update(sub)
+
+        all_shape = self.imshp is not None and self.kshp is not None and self.nkern is not None and self.bsize is not None
+
        d["self_out_mode"]=self.out_mode
-        d["self_bsize"]=self.bsize
-        d["self_nkern"]=self.nkern
        d["self_dx"]=self.dx
        d["self_dy"]=self.dy
        d["mode"]=self.out_mode.upper()
-        d["self_outshp0"]=self.outshp[0]
-        d["self_outshp1"]=self.outshp[1]
-        d["self_imshp0"]=self.imshp[0]
-        d["self_imshp1"]=self.imshp[1]
-        d["self_imshp2"]=self.imshp[2]
        d["mode"]=self.out_mode.upper()
-        d["self_kshp0"]=self.kshp[0]
-        d["self_kshp1"]=self.kshp[1]
-        d["self_kshp_logical_r"] = self.kshp_logical[0]
-        d["self_kshp_logical_c"] = self.kshp_logical[1]
-        d["self_kshp_logical_stride_r"] = int(N.ceil(self.kshp_logical[0] / float(self.kshp[0])))
-        d["self_kshp_logical_stride_c"] = int(N.ceil(self.kshp_logical[1] / float(self.kshp[1])))
+        d["affectation"]="="
+        if all_shape:
+            d["self_bsize"]=self.bsize
+            d["self_nkern"]=self.nkern
+            d["self_outshp0"]=self.outshp[0]
+            d["self_outshp1"]=self.outshp[1]
+            d["self_imshp0"]=self.imshp[0]
+            d["self_imshp1"]=self.imshp[1]
+            d["self_imshp2"]=self.imshp[2]
+            d["self_kshp0"]=self.kshp[0]
+            d["self_kshp1"]=self.kshp[1]
+            d["self_kshp_logical_r"] = self.kshp_logical[0]
+            d["self_kshp_logical_c"] = self.kshp_logical[1]
+            d["self_kshp_logical_stride_r"] = int(N.ceil(self.kshp_logical[0] / float(self.kshp[0])))
+            d["self_kshp_logical_stride_c"] = int(N.ceil(self.kshp_logical[1] / float(self.kshp[1])))
+            d["self_imshp_logical_r"] = self.imshp_logical[1] #N.B. 1  not 0
+            d["self_imshp_logical_c"] = self.imshp_logical[2]#N.B. 2  not 1
+            d["self_imshp_logical_stride_r"] = int(N.ceil(self.imshp_logical[1] / float(self.imshp[1])))
+            d["self_imshp_logical_stride_c"] = int(N.ceil(self.imshp_logical[2] / float(self.imshp[2])))
+            if not self.imshp[0]==1: d["affectation"]="+="
+            d["all_shape"]=1
+            d["dim_zz_const"]="const"
+        else:
+            d["self_bsize"]="%(img2d)s->dimensions[0]"%d
+            d["self_nkern"]="%(filtersflipped)s->dimensions[0]"%d
+            d["self_outshp0"]="-1"
+            d["self_outshp1"]="-1"
+            d["self_imshp0"]="%(img2d)s->dimensions[1]"%d
+            d["self_imshp1"]="%(img2d)s->dimensions[2]"%d
+            d["self_imshp2"]="%(img2d)s->dimensions[3]"%d
+            d["self_kshp0"]="%(filtersflipped)s->dimensions[2]"%d
+            d["self_kshp1"]="%(filtersflipped)s->dimensions[3]"%d
+            d["affectation"]="+="
+            d["all_shape"]=0
+            d["dim_zz_const"]=""
+
        if self.kshp_logical_top_aligned:
            d["self_kshp_logical_offset_r"] = 0
            d["self_kshp_logical_offset_c"] = 0
@@ -427,42 +554,38 @@ using namespace std;
            d["self_kshp_logical_offset_r"] = (self.kshp_logical[0] - (self.kshp[0]*rstride) - 1+rstride) % rstride
            d["self_kshp_logical_offset_c"] = (self.kshp_logical[1] - (self.kshp[1]*cstride) - 1+cstride) % cstride
            del rstride, cstride
-        d["self_imshp_logical_r"] = self.imshp_logical[1] #N.B. 1  not 0
-        d["self_imshp_logical_c"] = self.imshp_logical[2]#N.B. 2  not 1
-        d["self_imshp_logical_stride_r"] = int(N.ceil(self.imshp_logical[1] / float(self.imshp[1])))
-        d["self_imshp_logical_stride_c"] = int(N.ceil(self.imshp_logical[2] / float(self.imshp[2])))
-        d["affectation"]="="
-        if not self.imshp[0]==1: d["affectation"]="+="
+        
        if node.inputs[0].type.dtype=="float32": d["type"]="float"
        elif node.inputs[0].type.dtype=="float64": d["type"]="double"
        else: raise Exception("Type %s not implemented"%node.inputs[0].type.dtype)
        d["gemm"]='dgemm_'
        if not d["type"]=="double":d["gemm"]='sgemm_'

-        #print 'LOGICAL OFFSET', self.kshp_logical_top_aligned, d["self_kshp_logical_r"],
-        #print d["self_kshp0"], d["self_kshp_logical_offset_r"], d["self_kshp_logical_stride_r"],
-        #print self.out_mode, d["self_imshp_logical_stride_r"]
-
        if self.imshp != self.imshp_logical or self.kshp != self.kshp_logical:
-#            print "return imshp!=imshp_logical or self.kshp != self.kshp_logical shape version"
+            if verbose:
+                print "return imshp!=imshp_logical or self.kshp != self.kshp_logical shape version"
            return _conv_op_code_a % d

        if self.unroll_patch:
-#            print "return unroll patch version",self.dx,self.dy
+            if self.verbose:
+                print "return unroll patch version",self.dx,self.dy
            return _conv_op_code_unroll_patch%d
        if self.unroll_batch>0 or self.unroll_kern>0:
            if self.unroll_batch<=0: self.unroll_batch=1
            if self.unroll_kern<=0: self.unroll_kern=1
-#            print "return unrolled batch and kern code by",self.unroll_batch, self.unroll_kern
+            if self.verbose:
+                print "return unrolled batch and kern code by",self.unroll_batch, self.unroll_kern
            return gen_conv_code_unroll_batch_kern(d, self.unroll_batch,
                                                   self.unroll_kern)

        #TODO: should we choose the unroll size automatically with the bigger divisor under 5? 
        if self.out_mode == 'valid' and self.dx==0 and self.dy==0:
-#            print "return gemm version"
+            if self.verbose:
+                print "return gemm version"
            return _conv_op_code_valid_gemm % d
        else:
-#            print "return no gemm version"
+            if self.verbose:
+                print "return no gemm version"
            return _conv_op_code_a % d

 def convolve2(kerns, kshp, nkern, images, imshp, bsize, step=(1,1),
@@ -626,9 +749,6 @@ if ((!%(z)s)
 int Os[2];
 Os[0]=%(self_outshp0)s;
 Os[1]=%(self_outshp1)s;
-//I keep the formula to calculte Os in case we need it in the futur.
-//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
-//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}

 for(int b=0;b< %(self_bsize)s;b++){
  for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){
@@ -1110,9 +1230,6 @@ if ((!%(z)s)
 int Os[2];
 Os[0]=%(self_outshp0)s;
 Os[1]=%(self_outshp1)s;
-//I keep the formula to calculte Os in case we need it in the futur.
-//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
-//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}

 for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
  for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
@@ -1226,15 +1343,23 @@ _conv_op_code_unroll_patch = """
 const int mode=%(mode)s;
 int typenum=0, typenum_f=0;
 PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
-const %(type)s fill_value = 0;
+const %(type)s fill_value = 0;//only value of 0 are currently tested and correctly implemented

 int type_im=PyArray_TYPE(%(img2d)s);
 int type_ker=PyArray_TYPE(%(filtersflipped)s);

-npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
-npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
-npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s};
-
+const npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
+const npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s};
+%(dim_zz_const)s npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
+#if !%(all_shape)s
+  if (mode == FULL) {
+    dim_zz[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s));
+    dim_zz[1] = (int)ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));
+  } else {
+    dim_zz[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s));
+    dim_zz[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));
+  }
+#endif
 PyArray_Dims img2d_shape;
 npy_intp img2d_dim[4]={1,1,0,0};
 img2d_shape.ptr=img2d_dim;
@@ -1259,7 +1384,8 @@ if(%(img2d)s->nd==2){
  img2d_dim[1]=%(img2d)s->dimensions[1];
  img2d_dim[0]=%(img2d)s->dimensions[0];
 }else {
-    PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
+    PyErr_Format(PyExc_ValueError,
+      "image don't have a good number of dimensions %%d. ", %(filtersflipped)s->nd);
    %(fail)s;
 }

@@ -1273,11 +1399,8 @@ if(%(filtersflipped)s->nd==3){
  kerns_dim[1]=%(filtersflipped)s->dimensions[1];
  kerns_dim[0]=%(filtersflipped)s->dimensions[0];
 }else{
-    std:stringstream temp;
-    temp << "nddim="<<%(filtersflipped)s->nd;
-    std::string param = temp.str();
-    PyErr_SetString(PyExc_ValueError,
-      ("kernel don't have a good shape. " + param).c_str());
+    PyErr_Format(PyExc_ValueError,
+      "kernel don't have a good number of dimensions %%d. ", %(filtersflipped)s->nd);
    %(fail)s;
 }

@@ -1312,6 +1435,13 @@ filtersflipped_arr = (PyArrayObject*)filtersflipped;
 if(mode != VALID && mode != FULL){
  PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;
 }
+
+if(dim_zz[0]<=0 || dim_zz[1]<=0){
+PyErr_Format(PyExc_ValueError,
+      "Output dimensions are not valid %%dx%%d",dim_zz[0],dim_zz[1]);
+      %(fail)s;
+}
+
 typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
 typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
 if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
@@ -1339,13 +1469,6 @@ if ((!%(z)s)
  //PyArray_FILLWBYTE((PyObject*)%(z)s,0);
 }

-int Os[2];
-Os[0]=%(self_outshp0)s;
-Os[1]=%(self_outshp1)s;
-//I keep the formula to calculte Os in case we need it in the futur.
-//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
-//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}
-
 for(int b=0;b< %(self_bsize)s;b++){
  for(int n_kern=0;n_kern<%(self_nkern)s;n_kern++){

@@ -1365,13 +1488,13 @@ for(int b=0;b< %(self_bsize)s;b++){

      int new_m;

-      for (int iter_m=0; iter_m < Os[0]; iter_m++) {
+      for (int iter_m=0; iter_m < dim_zz[0]; iter_m++) {
        // Reposition index into input image based on requested output size
        int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
        if (mode == FULL) new_m = pos_m ;
        else new_m = (pos_m+dim_ker[0]-1);

-        for (int iter_n=0; iter_n < Os[1]; iter_n++) {  // loop over columns
+        for (int iter_n=0; iter_n < dim_zz[1]; iter_n++) {  // loop over columns
          int pos_n=iter_n*%(self_dy)s;
          %(type)s sum=0;
          %(type)s sum2=0;
@@ -1392,7 +1515,6 @@ for(int b=0;b< %(self_bsize)s;b++){
                  }
              }else{
                //do the part where kernel is to the right of the img
-//TODO: implement unroll patch for fill_value!=0
                int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
                if(fill_value!=0){ 
                
@@ -1405,34 +1527,25 @@ for(int b=0;b< %(self_bsize)s;b++){
                max_k=min(pos_n+1,(int)dim_ker[1]);
                const %(type)s * idx_in=&in[ind0*dim_im[1]];

-                if(iter_n + 4*%(self_dy)s < Os[1]
+                if(iter_n + 4*%(self_dy)s < dim_zz[1]
                         && iter_n>dim_ker[1]-1+3 
                         && iter_n<dim_im[1]-dim_ker[1]+1-3){
                  nb_sum=4;
-//cout<<4<<endl;
                  for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
                    sum+=idx_hvals[k]*idx_in[ind1];
                    sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
                    sum3+=idx_hvals[k]*idx_in[ind1+2*%(self_dy)s];
                    sum4+=idx_hvals[k]*idx_in[ind1+3*%(self_dy)s];
                  }
-                }else if(iter_n + 2*%(self_dy)s < Os[1] 
+                }else if(iter_n + 2*%(self_dy)s < dim_zz[1] 
                         && iter_n>dim_ker[1]-1
                         && iter_n<dim_im[1]-dim_ker[1]+1){
-//cout<<2<<endl;
                  nb_sum=2;
-//                  if(iter_n==dim_ker[1]-1){//k-1<min(pos_n+%(self_dy)s,(int)dim_ker[1])){
-//                    sum2+=idx_hvals[k-1]*idx_in[pos_n-k-%(self_dy)s];
-//                  }
                  for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
                    sum+=idx_hvals[k]*idx_in[ind1];
                    sum2+=idx_hvals[k]*idx_in[ind1+%(self_dy)s];
                  }
-//                  sum2+=idx_hvals[k]*idx_in[pos_n-k+%(self_dy)s];
-//                  sum+=idx_hvals[k]*idx_in[pos_n-k];
-//                  k++;
                }else{
-//cout<<1<<endl;
                  nb_sum=1;
                  /*
                  %(type)s sum_=0;
@@ -1456,7 +1569,7 @@ for(int b=0;b< %(self_bsize)s;b++){
            }else{//valid mode
              const %(type)s* idx_in=&in[ind0*dim_im[1]];
              const %(type)s* idx_hvals=&hvals[j*dim_ker[1]];
-              if(iter_n + 4*%(self_dy)s < Os[1]){
+              if(iter_n + 4*%(self_dy)s < dim_zz[1]){
                nb_sum=4;
                for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
                  sum+=idx_hvals[k]*idx_in[im_idx];
@@ -1464,7 +1577,7 @@ for(int b=0;b< %(self_bsize)s;b++){
                  sum3+=idx_hvals[k]*idx_in[im_idx+2*%(self_dy)s];
                  sum4+=idx_hvals[k]*idx_in[im_idx+3*%(self_dy)s];
                }
-              }else if(iter_n + 2*%(self_dy)s < Os[1]){
+              }else if(iter_n + 2*%(self_dy)s < dim_zz[1]){
                nb_sum=2;
                for (int k=dim_ker[1]-1,im_idx=pos_n; k >=0; k--,im_idx++) {
                  sum+=idx_hvals[k]*idx_in[im_idx];
@@ -1485,29 +1598,9 @@ for(int b=0;b< %(self_bsize)s;b++){
          case 1: out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
          }
          iter_n+=nb_sum-1;
-/*
-          out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum;
-          if(nb_sum>=2){
-            iter_n++;
-            out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum2;
-          }
-          if(nb_sum>=3){
-            iter_n++;
-            out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum3;
-          }
-          if(nb_sum>=4){
-            iter_n++;
-            out[iter_m*dim_zz[1]+iter_n] %(affectation)s sum4;
-          }
-*/
        }//for iter_n
      }//for iter_m
    }//for stack_size
-    if (0 && (mode==FULL)){
-      for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) 
-        std::cout << " " << out[i];
-      std::cout << "\\n";
-    }
  }//for n_kern
 }//for b
 Py_XDECREF(img2d);

--- a/theano/sandbox/my_test_scan.py
+++ b/theano/sandbox/my_test_scan.py
-import numpy
-import theano
-import theano.sandbox.scan
-
-
-
-# generator network, only one output , type scalar ; no sequence or 
-# non sequence arguments
-def test_1():
-  def f_pow2(x_tm1):
-    return (2*x_tm1, {})
-
-  s = theano.tensor.dvector()
-  n_steps = theano.tensor.dscalar()
-  Y = theano.sandbox.scan.scan(f_pow2, [],s, [],n_steps = n_steps)
-
-  f1 = theano.function([s,n_steps], Y)
-  assert( numpy.any(f1([1],3)== [2,4,8])  )
-
-# simple rnn, one input, one state, weights for each; input/state are 
-# vectors, weights are scalars
-def test_2():
-    def f_rnn(u_t,x_tm1,W_in, W):
-        return (u_t*W_in+x_tm1*W, {})
-
-    u    = theano.tensor.dvector()
-    x0   = theano.tensor.dvector()
-    W_in = theano.tensor.dscalar()
-    W    = theano.tensor.dscalar()
-
-    Y = theano.sandbox.scan.scan(f_rnn, u,x0,[W_in,W])
-
-    f2 = theano.function([u,x0,W_in,W], Y)
-    
-    assert(numpy.any(f2([1,2,3,4],[1],.1,1)== numpy.array([1.1,1.3,1.6,2.])))
-
-# simple rnn, one input, one state, weights for each; input/state are 
-# vectors, weights are scalars; using shared variables
-def test_3():
-
-    u    = theano.tensor.dvector()
-    x0   = theano.tensor.dvector()
-    W_in = theano.shared(.1, name = 'w_in')
-    W    = theano.shared(1., name ='w')
-
-    def f_rnn_shared(u_t,x_tm1):
-        return (u_t*W_in+x_tm1*W, {})
-
-    Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0,[])
-
-    f3 = theano.function([u,x0], Y)
-    
-    assert(numpy.any(f3([1,2,3,4],[1])== numpy.array([1.1,1.3,1.6,2.])))
-
-
-# some rnn with multiple outputs and multiple inputs; other dimension 
-# instead of scalars/vectors
-def test_4():
-
-    W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
-    W     = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
-    W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
-    W_in1 = theano.tensor.dmatrix('win')
-    u1    = theano.tensor.dmatrix('u1')
-    u2    = theano.tensor.dvector('u2')
-    x0    = theano.tensor.dmatrix('x0')
-    y0    = theano.tensor.dvector('y0')
-
-## Why dot doesn;t work with scalars !??
-## Why  *  doesn't support SharedVariable and TensorVariable
-
-    def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
-        return ({}, [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
-                    theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)])
-
-    Y = theano.sandbox.scan.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
-
-    f4 = theano.function([u1,u2,x0,y0,W_in1], Y)
-
-    (x,y) =  f4( numpy.array([[1,2],[1,2],[1,2]]), \
-              numpy.array([1,2,3]),             \
-              numpy.array([[0,0]]),             \
-              numpy.array([1]),                 \
-              numpy.array([[1,1],[1,1]]))
-
-    assert( numpy.all(x == numpy.array([[4.,5.],[18.,16.],[58.,43.]])))
-    assert( numpy.all(y == numpy.array([0.,7.,25.])))
-
-
-# basic ESN using updates 
-def test_5(): 
-    W_in = theano.shared(numpy.array([1.,1.]), name='win')
-    W    = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
-    W_out= theano.shared(numpy.array([.5,1.]), name='wout')
-
-    u  = theano.tensor.dvector('u')
-    x  = theano.shared(numpy.array([0.,0.]),'x')
-    y0 = theano.tensor.dvector('y0')
-
-    def f_ESN(u_t):
-        return ( theano.dot(x,W_out), \
-         { x: W_in*u_t + theano.dot(x,W) } )
-
-    Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]})
-
-    f5 = theano.function([u,y0],Y)
-    assert( f5( numpy.array([1,2,3]), numpy.array([0])) == \
-             numpy.array([0.,1.4,3.15]))
-
-# basic ESN using updates ; moving backwards
-def test_6(): 
-    W_in = theano.shared(numpy.array([1.,1.]), name='win')
-    W    = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
-    W_out= theano.shared(numpy.array([.5,1.]), name='wout')
-
-    u  = theano.tensor.dvector('u')
-    x  = theano.shared(numpy.array([0.,0.]),'x')
-    y0 = theano.tensor.dvector('y0')
-
-    def f_ESN(u_t):
-        return ( theano.dot(x,W_out), \
-         { x: W_in*u_t + theano.dot(x,W) } )
-
-    Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]}, \
-                                 go_backwards = True)
-
-    f6 = theano.function([u,y0],Y)
-    assert( f6( numpy.array([1,2,3]), numpy.array([0])) == \
-             numpy.array([0., 4.5, 3.45]))
-
-
-'''
- TO TEST: 
-    - test taps (for sequences and outputs )
-    - test gradient (one output)
-    - test gradient (multiple outputs)
-    - test gradient (go_bacwards) 
-    - test gradient (multiple outputs / some uncomputable )
-    - test gradient (truncate_gradient)
-    - test gradient (force_gradient) 
-    - test inplace map
-'''
-
-
-if __name__=='__main__':
-
-    test_1()
-    test_2()
-    test_3()
-    test_4()
-    test_5()
-    test_6()
-
-
-
-
--- a/theano/sandbox/scan.py
+++ b/theano/sandbox/scan.py
@@ -13,29 +13,24 @@ def info(*msg):
    _logger.info('INFO theano.scan: '+' '.join(msg))


-# Hashing a list; list used by scan are list of numbers, therefore a list 
-# can be hashed by hashing all elements in the list
-def hash_list(list):
+# Hashing a dictionary or a list or a tuple or any type that is hashable with
+# the hash() function
+def hash_listsDictsTuples(x):
    hash_value = 0
-    for v in list:
-        hash_value ^= hash(v)
-    return hash_value
-
-
-# Hashing a dictionary; the dictionary used by scan has as keys numbers and 
-# as values either numbers or list of numbers
-def hash_dict(dictionary):
-    hash_value = 0
-    for k,v in dictionary.iteritems():
-        # hash key
-        hash_value ^= hash(k)
-        if type(v) in (list,tuple):
-            hash_value ^= hash_list(v)
-        else:
-            hash_value ^= hash(v)
+    if type(x) == dict :
+        for k,v in x.iteritems():
+            hash_value ^= hash_listsDictsTuples(k)
+            hash_value ^= hash_listsDictsTuples(v)
+    elif type(x) in (list,tuple):
+        for v in x:
+            hash_value ^= hash_listsDictsTuples(v)
+    else:
+      try:
+        hash_value ^= hash(x)
+      except:
+        pass
    return hash_value

-
 def scan(fn, sequences, initial_states, non_sequences, inplace_map={}, 
         sequences_taps={}, outputs_taps = {},
         n_steps = theano.tensor.zero(), force_gradient = False, 
@@ -174,7 +169,8 @@ class Scan(theano.Op):

        self.destroy_map = {}
        if inplace:
-            self.destroy_map = inplace_map
+            for i in inplace_map.keys():
+                self.destroy_map.update({i: [inplace_map[i]] } )

        self.seqs_taps      = seqs_taps
        self.outs_taps      = outs_taps
@@ -192,13 +188,25 @@ class Scan(theano.Op):
   
        self.fn = theano.function(inputs,outputs, \
                                   updates = updates, mode = mode)
-
+        
        g_y = [outputs[0].type()]
-        g_args = theano.tensor.grad(outputs[0],inputs, g_cost = g_y[-1])
+
+        def compute_gradient(y, g_y):
+            gmap = theano.gradient.grad_sources_inputs( \
+                        [(y,g_y)], theano.gof.graph.inputs([y]), False)
+            def zero(p):
+              return theano.tensor.TensorConstant(theano.tensor.TensorType(\
+                      dtype=p.type.dtype, broadcastable=[]),
+                      numpy.asarray(0,dtype = p.type.dtype))
+
+            return [gmap.get(p, zero(p)) for p in inputs]
+
+
+        g_args = compute_gradient( outputs[0], g_y[-1]) 
        # for all outputs compute gradients and then sum them up
        for y in outputs[1:]:
            g_y += [y.type()]
-            g_args_y = theano.tensor.grad(y,inputs, g_cost=g_y[-1])
+            g_args_y = compute_gradient( y,g_y[-1])
            for i in xrange(len(g_args)):
                g_args[i] += g_args_y[i]

@@ -244,6 +252,7 @@ class Scan(theano.Op):
               (self.n_outs == other.n_outs) and\
               (self.n_args == other.n_args)
      return rval
+      

    def __hash__(self):
      return hash(type(self)) ^ \
@@ -254,13 +263,13 @@ class Scan(theano.Op):
             hash(self.go_backwards) ^\
             hash(self.truncate_gradient) ^\
             hash(self.n_args) ^ \
-             hash_list(self.outputs) ^ \
-             hash_list(self.inputs) ^ \
-             hash_list(self.g_ins) ^ \
-             hash_list(self.g_outs) ^ \
-             hash_dict(self.seqs_taps) ^\
-             hash_dict(self.outs_taps) ^\
-             hash_dict(self.updates)
+             hash_listsDictsTuples(self.outputs) ^ \
+             hash_listsDictsTuples(self.inputs) ^ \
+             hash_listsDictsTuples(self.g_ins) ^ \
+             hash_listsDictsTuples(self.g_outs) ^ \
+             hash_listsDictsTuples(self.seqs_taps) ^\
+             hash_listsDictsTuples(self.outs_taps) ^\
+             hash_listsDictsTuples(self.updates)




--- a/theano/sandbox/test_conv.py
+++ b/theano/sandbox/test_conv.py
@@ -121,7 +121,12 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
                hidval1=outval.copy()

            # ConvOp
-            conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch)(inputs4, kerns4)
+            if unroll_patch:
+                conv_op = ConvOp(dx=ss[0],dy=ss[1], output_mode=conv_mode,
+                                 unroll_patch=unroll_patch)(inputs4, kerns4)
+            else:
+                conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode,
+                                 unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch)(inputs4, kerns4)
            l1shp=N.hstack((nkern,
                            getFilterOutShp(imshp, kshp, ss, conv_mode)))
            propup2 = function([inputs4, kerns4], conv_op)
@@ -328,7 +333,7 @@ class TestConvOp(unittest.TestCase):
        ssizess = [[(1,1),(1,2)],[(1,1),(2,2)]]
        convmodes = ['valid','full']
        do_convolve2=True
-        unroll = [(0,0,False),(0,0,True),(1,1,False),(2,2,False),(3,2,False)]#(batch,kern,patch)
+        unroll = [(0,0,True),(0,0,False),(1,1,False),(2,2,False),(3,2,False)]#(batch,kern,patch)
        do_speed_test = False

        # TODO: this version show a bug that was fixed
@@ -515,23 +520,32 @@ class TestConvOp(unittest.TestCase):
                        for un_b,un_k, un_p in unroll:
                                for ss in ssizes:
                                    print 'test_ConvOpGrad'
-                                    print 'mode type:', mode, typ
-                                    print 'imshp:', imshp
-                                    print 'kshp:', kshp
-                                    print 'un_b:', un_b
-                                    print 'un_k:', un_k
-                                    print 'ss:', ss
-                                    print 'bsize:', bsize
-                                    print 'nkern:', 4
+#                                    print 'mode:',mode,'type:', typ
+#                                    print 'imshp:', imshp,
+#                                    print 'kshp:', kshp
+#                                    print 'un_b:', un_b,
+#                                    print 'un_k:', un_k,
+#                                    print 'un_p:', un_p
+#                                    print 'ss:', ss,
+#                                    print 'bsize:', bsize,
+#                                    print 'nkern:', nkern

                                    def test_i(imgs):
-                                        convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
-                                                        output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
+                                        if un_p and ss[0]==1 and ss[1]==1:
+                                            convop = ConvOp(dx=ss[0], dy=ss[1],
+                                                            output_mode=mode, unroll_patch=un_p)
+                                        else:
+                                            convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
+                                                            output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
                                        return convop(imgs, kernvals)

                                    def test_k(kerns):
-                                        convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
-                                                        output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
+                                        if un_p and ss[0]==1 and ss[1]==1:
+                                            convop = ConvOp(dx=ss[0], dy=ss[1],
+                                                            output_mode=mode, unroll_patch=un_p)
+                                        else:
+                                            convop = ConvOp(imshp, kshp, nkern, bsize, ss[0], ss[1],
+                                                            output_mode=mode, unroll_batch=un_b, unroll_kern=un_k, unroll_patch=un_p)
                                        return convop(imgvals, kerns)
                                    print mode, imshp, kshp, un_b, un_k, ss
                                    #TODO the tolerance needed to pass is very high for float32(0.17). Is this acceptable? Expected?

--- a/theano/sandbox/test_scan.py
+++ b/theano/sandbox/test_scan.py
@@ -91,7 +91,6 @@ class T_Scan(unittest.TestCase):
        utt.seed_rng()


-
    # generator network, only one output , type scalar ; no sequence or 
    # non sequence arguments
    def test_1(self):
@@ -243,9 +242,11 @@ class T_Scan(unittest.TestCase):
        Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
                 sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})

-        f7 = theano.function([u,x0], Y)
-        
-        #print f7([1,2,3,4],[1,2])
+        f7   = theano.function([u,x0], Y)
+        v_u  = numpy.asarray([1.,2.,3.,4.])
+        v_x0 = numpy.asarray([1.,2.])
+        out  = numpy.asarray([3.1,5.3])
+        assert (compareArrays( out, f7(v_u, v_x0)))
        
    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars; using shared variables and past 
@@ -263,16 +264,46 @@ class T_Scan(unittest.TestCase):
        Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
                 sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})

-        f8 = theano.function([u,x0], Y)
+        f8   = theano.function([u,x0], Y)
+        v_u  = numpy.array([1.,2.,3.,4.,5.,6.])
+        v_x0 = numpy.array([1.,2.])
+        out  = numpy.array([3.6, 6.4])
+
+        assert (compareArrays( out, f8(v_u, v_x0) ) )
        
-        #print f8([1,2,3,4,5,6],[1,2])
+    '''
+    # simple rnn ; compute inplace
+    def test_9(self):
        
+        u    = theano.tensor.dvector()
+        mu   = theano.Param( u, mutable = True)
+        x0   = theano.tensor.dvector()
+        W_in = theano.shared(.1)
+        W    = theano.shared(1.)
+
+        def f_rnn_shared(u_t, x_tm1):
+            return (u_t*W_in + x_tm1*W, {})
+        Y = theano.sandbox.scan.scan(f_rnn_shared, u, x0,[], \
+                    inplace_map={0:0} )
+        f9   = theano.function([mu,x0], Y , #mode = 'FAST_RUN')
+                                mode = 'DEBUG_MODE')
+        v_u  = numpy.array([1.,2.,3.])
+        v_x0 = numpy.array([1.])

+        out = f9(v_u, v_x0)
+        v_out = numpy.array([1.1,1.3,1.6])

+        assert (compareArrays(out, v_out))
+        print v_u
+        assert (compareArrays(v_u, out))
+
+    '''
+    # test gradient simple network 
+    def test_10(self):
+        pass

    '''
     TO TEST: 
-        - test taps (for sequences and outputs )
        - test gradient (one output)
        - test gradient (multiple outputs)
        - test gradient (go_bacwards) 
@@ -280,7 +311,6 @@ class T_Scan(unittest.TestCase):
        - test gradient (truncate_gradient)
        - test gradient (force_gradient)
        - test_gradient (taps past/future)
-        - test inplace map
    '''



--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -343,12 +343,29 @@ class CSM(gof.Op):

        """
        data = tensor.as_tensor_variable(data)
+
+        # Note that we use `view(numpy.int32)` in addition to providing the
+        # 'int32' dtype to `numpy.asarray`. This is because on some computers
+        # (e.g. a Windows 32 bits machine), we can have the following assert
+        # fail:
+        #   x = numpy.array([0], dtype=numpy.intc)
+        #   y = numpy.asarray(x, dtype=numpy.int32)
+        #   assert y.dtype.num == numpy.dtype(numpy.int32).num
+        # while the assert does *not* fail when replacing the second line by:
+        #   y = numpy.asarray(x, dtype='int32').view(numpy.int32)
+        # This is a known defect in Numpy. For more information see ticket
+        # http://projects.scipy.org/numpy/ticket/870
+
+        # Note also that it is important to keep "dtype='int32'" when calling
+        # `numpy.asarray`. This is because `view` is only some kind of cast to
+        # the exact data type we want to use. If a conversion is required (e.g.
+        # from int64 to int32), it must be done in the call to `numpy.asarray`.
        if not isinstance(indices, tensor.TensorVariable):
-            indices = numpy.asarray(indices, dtype='int32')
+            indices = numpy.asarray(indices, dtype='int32').view(numpy.int32)
        if not isinstance(indptr, tensor.TensorVariable):
-            indptr = numpy.asarray(indptr, dtype='int32')
+            indptr = numpy.asarray(indptr, dtype='int32').view(numpy.int32)
        if not isinstance(shape, tensor.TensorVariable):
-            shape = numpy.asarray(shape, dtype='int32')
+            shape = numpy.asarray(shape, dtype='int32').view(numpy.int32)
        indices = tensor.as_tensor_variable(indices)
        indptr = tensor.as_tensor_variable(indptr)
        shape = tensor.as_tensor_variable(shape)

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -169,15 +169,17 @@ class test_structureddot(unittest.TestCase):
                # iterate for a few different random graph patterns
                for i in range(10):
                    spmat = sp.csc_matrix((4,6), dtype=sparse_dtype)
-                    for i in range(5):
+                    for k in range(5):
                        # set non-zeros in random locations (row x, col y)
                        x = numpy.floor(numpy.random.rand()*spmat.shape[0])
                        y = numpy.floor(numpy.random.rand()*spmat.shape[1])
                        spmat[x,y] = numpy.random.rand()*10
                    spmat = sp.csc_matrix(spmat)
               
-                    kerns = tensor.Tensor(broadcastable=[False], dtype=sparse_dtype)('kerns')
-                    images = tensor.Tensor(broadcastable=[False, False], dtype=dense_dtype)('images')
+                    kerns = tensor.Tensor(broadcastable=[False],
+                            dtype=sparse_dtype)('kerns')
+                    images = tensor.Tensor(broadcastable=[False, False],
+                            dtype=dense_dtype)('images')

                    output_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
                    ##
@@ -186,7 +188,8 @@ class test_structureddot(unittest.TestCase):

                    # build symbolic theano graph
                    def buildgraphCSC(kerns,images):
-                        csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
+                        csc = CSC(kerns, spmat.indices[:spmat.size],
+                                spmat.indptr, spmat.shape)
                        assert csc.type.dtype == sparse_dtype
                        rval = structured_dot(csc, images.T)
                        assert rval.type.dtype == output_dtype
@@ -197,8 +200,12 @@ class test_structureddot(unittest.TestCase):

                    # compute theano outputs
                    kernvals = spmat.data[:spmat.size]
-                    imvals = 1.0 + 1.0 * numpy.array(numpy.arange(bsize*spmat.shape[1]).\
+                    imvals = 1.0 + 1.0 * numpy.array(
+                            numpy.arange(bsize*spmat.shape[1]).\
                            reshape(bsize,spmat.shape[1]), dtype=dense_dtype)
+                    #print('dense_dtype=%s' % dense_dtype)
+                    #print('sparse_dtype=%s' % sparse_dtype)
+                    #print('i=%s' % i)
                    print 'kerntype', str(kernvals.dtype), kernvals.dtype.num
                    outvals = f(kernvals,imvals)
                    print 'YAY'
@@ -210,9 +217,10 @@ class test_structureddot(unittest.TestCase):
                    assert _is_dense(c)
                    assert str(outvals.dtype) == output_dtype
                    assert numpy.all(numpy.abs(outvals - 
-                                     numpy.array(c, dtype=output_dtype)) < 1e-4)
+                        numpy.array(c, dtype=output_dtype)) < 1e-4)

-                    if sparse_dtype.startswith('float') and dense_dtype.startswith('float'):
+                    if (sparse_dtype.startswith('float') and
+                            dense_dtype.startswith('float')):
                        utt.verify_grad(buildgraphCSC, 
                                [kernvals, imvals])


--- a/theano/tensor/__init__.py
+++ b/theano/tensor/__init__.py
@@ -12,6 +12,7 @@ import blas
 import xlogx

 import raw_random, randomstreams
+import shared_randomstreams
 from randomstreams import \
    RandomStreams