fixed some bug in scan_test

cca41015 · Razvan Pascanu · 668f41e0 · 668f41e0 · cca41015 · cca41015
--- a/theano/sandbox/my_test_scan.py
+++ b/theano/sandbox/my_test_scan.py
-import numpy
-import theano
-import theano.sandbox.scan
-
-
-
-# generator network, only one output , type scalar ; no sequence or 
-# non sequence arguments
-def test_1():
-  def f_pow2(x_tm1):
-    return (2*x_tm1, {})
-
-  s = theano.tensor.dvector()
-  n_steps = theano.tensor.dscalar()
-  Y = theano.sandbox.scan.scan(f_pow2, [],s, [],n_steps = n_steps)
-
-  f1 = theano.function([s,n_steps], Y)
-  assert( numpy.any(f1([1],3)== [2,4,8])  )
-
-# simple rnn, one input, one state, weights for each; input/state are 
-# vectors, weights are scalars
-def test_2():
-    def f_rnn(u_t,x_tm1,W_in, W):
-        return (u_t*W_in+x_tm1*W, {})
-
-    u    = theano.tensor.dvector()
-    x0   = theano.tensor.dvector()
-    W_in = theano.tensor.dscalar()
-    W    = theano.tensor.dscalar()
-
-    Y = theano.sandbox.scan.scan(f_rnn, u,x0,[W_in,W])
-
-    f2 = theano.function([u,x0,W_in,W], Y)
-    
-    assert(numpy.any(f2([1,2,3,4],[1],.1,1)== numpy.array([1.1,1.3,1.6,2.])))
-
-# simple rnn, one input, one state, weights for each; input/state are 
-# vectors, weights are scalars; using shared variables
-def test_3():
-
-    u    = theano.tensor.dvector()
-    x0   = theano.tensor.dvector()
-    W_in = theano.shared(.1, name = 'w_in')
-    W    = theano.shared(1., name ='w')
-
-    def f_rnn_shared(u_t,x_tm1):
-        return (u_t*W_in+x_tm1*W, {})
-
-    Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0,[])
-
-    f3 = theano.function([u,x0], Y)
-    
-    assert(numpy.any(f3([1,2,3,4],[1])== numpy.array([1.1,1.3,1.6,2.])))
-
-
-# some rnn with multiple outputs and multiple inputs; other dimension 
-# instead of scalars/vectors
-def test_4():
-
-    W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
-    W     = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
-    W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
-    W_in1 = theano.tensor.dmatrix('win')
-    u1    = theano.tensor.dmatrix('u1')
-    u2    = theano.tensor.dvector('u2')
-    x0    = theano.tensor.dmatrix('x0')
-    y0    = theano.tensor.dvector('y0')
-
-## Why dot doesn;t work with scalars !??
-## Why  *  doesn't support SharedVariable and TensorVariable
-
-    def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
-        return ({}, [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
-                    theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)])
-
-    Y = theano.sandbox.scan.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
-
-    f4 = theano.function([u1,u2,x0,y0,W_in1], Y)
-
-    (x,y) =  f4( numpy.array([[1,2],[1,2],[1,2]]), \
-              numpy.array([1,2,3]),             \
-              numpy.array([[0,0]]),             \
-              numpy.array([1]),                 \
-              numpy.array([[1,1],[1,1]]))
-
-    assert( numpy.all(x == numpy.array([[4.,5.],[18.,16.],[58.,43.]])))
-    assert( numpy.all(y == numpy.array([0.,7.,25.])))
-
-
-# basic ESN using updates 
-def test_5(): 
-    W_in = theano.shared(numpy.array([1.,1.]), name='win')
-    W    = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
-    W_out= theano.shared(numpy.array([.5,1.]), name='wout')
-
-    u  = theano.tensor.dvector('u')
-    x  = theano.shared(numpy.array([0.,0.]),'x')
-    y0 = theano.tensor.dvector('y0')
-
-    def f_ESN(u_t):
-        return ( theano.dot(x,W_out), \
-         { x: W_in*u_t + theano.dot(x,W) } )
-
-    Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]})
-
-    f5 = theano.function([u,y0],Y)
-    assert( f5( numpy.array([1,2,3]), numpy.array([0])) == \
-             numpy.array([0.,1.4,3.15]))
-
-# basic ESN using updates ; moving backwards
-def test_6(): 
-    W_in = theano.shared(numpy.array([1.,1.]), name='win')
-    W    = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
-    W_out= theano.shared(numpy.array([.5,1.]), name='wout')
-
-    u  = theano.tensor.dvector('u')
-    x  = theano.shared(numpy.array([0.,0.]),'x')
-    y0 = theano.tensor.dvector('y0')
-
-    def f_ESN(u_t):
-        return ( theano.dot(x,W_out), \
-         { x: W_in*u_t + theano.dot(x,W) } )
-
-    Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]}, \
-                                 go_backwards = True)
-
-    f6 = theano.function([u,y0],Y)
-    assert( f6( numpy.array([1,2,3]), numpy.array([0])) == \
-             numpy.array([0., 4.5, 3.45]))
-
-
-'''
- TO TEST: 
-    - test taps (for sequences and outputs )
-    - test gradient (one output)
-    - test gradient (multiple outputs)
-    - test gradient (go_bacwards) 
-    - test gradient (multiple outputs / some uncomputable )
-    - test gradient (truncate_gradient)
-    - test gradient (force_gradient) 
-    - test inplace map
-'''
-
-
-if __name__=='__main__':
-
-    test_1()
-    test_2()
-    test_3()
-    test_4()
-    test_5()
-    test_6()
-
-
-
-
--- a/theano/sandbox/scan.py
+++ b/theano/sandbox/scan.py
@@ -174,7 +174,8 @@ class Scan(theano.Op):

        self.destroy_map = {}
        if inplace:
-            self.destroy_map = inplace_map
+            for i in inplace_map.keys():
+                self.destroy_map.update({i: [inplace_map[i]] } )

        self.seqs_taps      = seqs_taps
        self.outs_taps      = outs_taps
@@ -192,13 +193,25 @@ class Scan(theano.Op):
   
        self.fn = theano.function(inputs,outputs, \
                                   updates = updates, mode = mode)
-
+        
        g_y = [outputs[0].type()]
-        g_args = theano.tensor.grad(outputs[0],inputs, g_cost = g_y[-1])
+
+        def compute_gradient(y, g_y):
+            gmap = theano.gradient.grad_sources_inputs( \
+                        [(y,g_y)], theano.gof.graph.inputs([y]), False)
+            def zero(p):
+              return theano.tensor.TensorConstant(theano.tensor.TensorType(\
+                      dtype=p.type.dtype, broadcastable=[]),
+                      numpy.asarray(0,dtype = p.type.dtype))
+
+            return [gmap.get(p, zero(p)) for p in inputs]
+
+
+        g_args = compute_gradient( outputs[0], g_y[-1]) 
        # for all outputs compute gradients and then sum them up
        for y in outputs[1:]:
            g_y += [y.type()]
-            g_args_y = theano.tensor.grad(y,inputs, g_cost=g_y[-1])
+            g_args_y = compute_gradient( y,g_y[-1])
            for i in xrange(len(g_args)):
                g_args[i] += g_args_y[i]

@@ -244,6 +257,7 @@ class Scan(theano.Op):
               (self.n_outs == other.n_outs) and\
               (self.n_args == other.n_args)
      return rval
+      

    def __hash__(self):
      return hash(type(self)) ^ \

--- a/theano/sandbox/test_scan.py
+++ b/theano/sandbox/test_scan.py
@@ -91,7 +91,6 @@ class T_Scan(unittest.TestCase):
        utt.seed_rng()


-
    # generator network, only one output , type scalar ; no sequence or 
    # non sequence arguments
    def test_1(self):
@@ -243,9 +242,11 @@ class T_Scan(unittest.TestCase):
        Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
                 sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})

-        f7 = theano.function([u,x0], Y)
-        
-        #print f7([1,2,3,4],[1,2])
+        f7   = theano.function([u,x0], Y)
+        v_u  = numpy.asarray([1.,2.,3.,4.])
+        v_x0 = numpy.asarray([1.,2.])
+        out  = numpy.asarray([3.1,5.3])
+        assert (compareArrays( out, f7(v_u, v_x0)))
        
    # simple rnn, one input, one state, weights for each; input/state are 
    # vectors, weights are scalars; using shared variables and past 
@@ -263,16 +264,48 @@ class T_Scan(unittest.TestCase):
        Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
                 sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})

-        f8 = theano.function([u,x0], Y)
-        
-        #print f8([1,2,3,4,5,6],[1,2])
-        
+        f8   = theano.function([u,x0], Y)
+        v_u  = numpy.array([1.,2.,3.,4.,5.,6.])
+        v_x0 = numpy.array([1.,2.])
+        out  = numpy.array([3.6, 6.4])

+        assert (compareArrays( out, f8(v_u, v_x0) ) )
+        
+    '''
+    NOTE : BROKEN .. inplace doesn't work due to a stochasticOpimization 
+    TODO : talk james

+    # simple rnn ; compute inplace
+    def test_9(self):
+        
+        u    = theano.tensor.dvector()
+        mu   = theano.Param( u, mutable = True)
+        x0   = theano.tensor.dvector()
+        W_in = theano.shared(.1)
+        W    = theano.shared(1.)
+
+        def f_rnn_shared(u_t, x_tm1):
+            return (u_t*W_in + x_tm1*W, {})
+        Y = theano.sandbox.scan.scan(f_rnn_shared, u, x0,[], \
+                    inplace_map={0:0} )
+        f9   = theano.function([mu,x0], Y , #mode = 'FAST_RUN')
+                                mode = 'DEBUG_MODE')
+        v_u  = numpy.array([1.,2.,3.])
+        v_x0 = numpy.array([1.])
+
+        out = f9(v_u, v_x0)
+        v_out = numpy.array([1.1,1.3,1.6])
+
+        assert (compareArrays(out, v_out))
+        print v_u
+        assert (compareArrays(v_u, out))
+     '''
+    # test gradient simple network 
+    def test_10(self):
+        pass

    '''
     TO TEST: 
-        - test taps (for sequences and outputs )
        - test gradient (one output)
        - test gradient (multiple outputs)
        - test gradient (go_bacwards) 
@@ -280,7 +313,6 @@ class T_Scan(unittest.TestCase):
        - test gradient (truncate_gradient)
        - test gradient (force_gradient)
        - test_gradient (taps past/future)
-        - test inplace map
    '''