imported patch dx

9d786083 · Frederic Bastien · 544ab6c3 · 9d786083 · 9d786083
--- a/theano/sandbox/conv.py
+++ b/theano/sandbox/conv.py
@@ -8,7 +8,7 @@ def getFilterOutShp(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
    s = -1 if mode=='valid' else 1
    inshp, kshp = N.array(inshp), N.array(kshp)
    return  N.int64(N.ceil((inshp[1:] + s*kshp - s*1)/\
-            N.array([dy,dx], dtype='float')))
+            N.array([dx,dy], dtype='float')))

 class ConvOp(Op):
    """
@@ -55,9 +55,12 @@ class ConvOp(Op):
            else:
                print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
                self.unroll_kern=1
-        if self.dx!=1 or self.dy!=1:
-            print "Warning, dx!=1 or dy!=1 only supported in python mode!"
-            raise NotImplementedError()
+                print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
+        if (self.dx!=1 or self.dy!=1) and self.unroll_batch==0 and self.unroll_kern==0:
+            print "WARNING: dx!=1 or dy!=1 is only supported with unrolling! We will unroll by 1"
+            self.unroll_kern=1
+            self.unroll_batch=1
+
        self.outshp = getFilterOutShp(self.imshp, kshp, (dx,dy), output_mode)
        self.out_mode = output_mode
        if not self.out_mode in ["valid", "full"]:
@@ -132,6 +135,8 @@ class ConvOp(Op):
        * will crash if filter the same size as input image
        """

+        assert self.dx==1 and self.dy==1#We didn't implemented the grad for that case. Can this be done?
+        
        ####### Determine gradient on kernels ########
        if inputs.ndim == 3:
            inputs = tensor.shape_padleft(inputs,1)
@@ -846,8 +851,12 @@ if ((!%(z)s)
 }

 int Os[2];
-if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;}
-else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;}
+Os[0]=%(self_outshp0)s;
+Os[1]=%(self_outshp1)s;
+//I keep the formula to calculte Os in case we need it in the futur.
+//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
+//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}
+
 for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
  for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){

@@ -868,12 +877,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){

      int new_m;

-      for (int m=0; m < Os[0]; m++) {
+      for (int iter_m=0; iter_m < Os[0]; iter_m++) {
        // Reposition index into input image based on requested output size
-        if (mode == FULL) new_m = m ;
-        else new_m = (m+dim_ker[0]-1);
+        int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
+        if (mode == FULL) new_m = pos_m ;
+        else new_m = (pos_m+dim_ker[0]-1);

-        for (int n=0; n < Os[1]; n++) {  // loop over columns 
+        for (int iter_n=0; iter_n < Os[1]; iter_n++) {  // loop over columns 
+          int pos_n=iter_n*%(self_dy)s;
        """%d
    ret+=my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize*unroll_ksize)
    ret+="""
@@ -897,7 +908,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
              }else{
                //do the part where kernel is to the right of the img

-                int k=0,max_k=max((int)(n-dim_im[1])+1,0);
+                int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
                if(fill_value!=0){ 
                
                  for(k=0;k<max_k;k++){
@@ -908,11 +919,11 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
                }else {k=max_k;}
                
                //do the part where the kernel is on the img
-                max_k=min(n+1,(int)dim_ker[1]);
+                max_k=min(pos_n+1,(int)dim_ker[1]);
 """%d
    ret+=my_dup("const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
    ret+="""
-                for (int ind1=n-k; k<max_k; k++,ind1--) {
+                for (int ind1=pos_n-k; k<max_k; k++,ind1--) {

 """%d
    ret+=my_dup2("sum%(unroll_iter)s+= idx_hvals%(unroll_kiter)s[k] * idx_in%(unroll_biter)s[ind1];")
@@ -931,7 +942,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
    ret+=my_dup("const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
    ret+=my_dup("const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker[1]];",unroll_ksize)
    ret+="""
-              int new_n = (n+dim_ker[1]-1);
+              int new_n = (pos_n+dim_ker[1]-1);

              for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
 """%d
@@ -942,7 +953,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){

          }//for j
 """%d
-    ret+=my_dup("out%(unroll_iter)s[m*dim_zz[1]+n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize*unroll_ksize)
+    ret+=my_dup("out%(unroll_iter)s[iter_m*dim_zz[1]+iter_n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize*unroll_ksize)
    ret+="""
        }//for n
      }//for m

--- a/theano/sandbox/test_conv.py
+++ b/theano/sandbox/test_conv.py
@@ -90,16 +90,18 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
                ####### test with new sp.convolve2 function ######
                time1 = time.time()
                hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp,  
-                                         bsize, (1,1), mode=conv_mode)
+                                         bsize, (ss[0],ss[1]), mode=conv_mode)
                propup = function([kern, img], hid)
                propup1 = function([kern, img], hid,mode=Mode(linker="py"))

                hidval  = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
-                hidval  = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
+                hidval  = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])
+#                hidval = hidval[:,:,::ss[0],::ss[1]]
                hidval = hidval.reshape(bsize, -1)
                for i in range(repeat):
                    hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
-                hidval1  = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
+                hidval1  = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])
+#                hidval1  = hidval1[:,:,::ss[0],::ss[1]]
                hidval1 = hidval1.reshape(bsize, -1)

                assert (N.abs(hidval-hidval1)<1e-5).all()
@@ -113,7 +115,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
                hidval1=outval.copy()

            # ConvOp
-            conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
+            conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
            l1shp=N.hstack((nkern,
                            getFilterOutShp(imshp, kshp, ss, conv_mode)))
            propup2 = function([inputs4, kerns4], conv_op)
@@ -122,14 +124,14 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
            time1 = time.time()
            for i in range(repeat):
                hidval2_ = propup2(imgval,w_flip)
-            hidval2 = hidval2_[:,:,0::ss[0],0::ss[1]]
+            hidval2 = hidval2_#[:,:,0::ss[0],0::ss[1]]
            tctot += time.time() - time1

            if conv_op_py:
                time1 = time.time()
                for i in range(repeat):
                    hidval3_ = propup3(imgval,w_flip)
-                hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
+                hidval3 = hidval3_#[:,:,0::ss[0],0::ss[1]]
                tpytot += time.time() - time1
                assert (N.abs(hidval2-hidval3)<1e-5).all()
            else:
@@ -235,7 +237,7 @@ class TestConvOp(unittest.TestCase):

                    # compute with new convolve2 (no timing info)
                    output4, outshp4  = convolve2(kerns, kshp, nkern, input,\
-                            imshp, bsize, (1,1), bias=bias, mode=conv_mode)
+                            imshp, bsize, (ss[0],ss[1]), bias=bias, mode=conv_mode)
 #                    print 'output4', output4

                    ttime1 = time.time()
@@ -244,7 +246,7 @@ class TestConvOp(unittest.TestCase):
 #                    print 'out4', out4, img1d, filtersflipped
                    tconv2 += [time.time() - ttime1]
                    out4 = out4.reshape(bsize, nkern, outshp4[1], outshp4[2])
-                    out4 = out4[:,:,0::ss[0],0::ss[1]]
+                    out4 = out4#[:,:,0::ss[0],0::ss[1]]
                    out4 = out4.reshape(bsize, -1)

                    # compute with ConvOp
@@ -252,18 +254,18 @@ class TestConvOp(unittest.TestCase):
                    inputs=dmatrix3()
                    kerns3=dmatrix3()
                    bia=T.dscalar()
-                    conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode)(inputs, kerns3)
+                    conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode)(inputs, kerns3)
                    f2 = function([inputs, kerns3], conv_op, mode=Mode(linker="c"))
                    f3 = function([inputs, kerns3], conv_op, mode=Mode(linker="py"))

                    ttime1 = time.time()
                    out2_ = f2(img2d, filtersflipped)
-                    out2__ = out2_[:,:,0::ss[0],0::ss[1]]
+                    out2__ = out2_#[:,:,0::ss[0],0::ss[1]]
                    tconvop += [time.time() - ttime1]
                    out2___ = out2__.copy()
                    out2 = out2___ + biasvals.reshape(1,nkern,1,1)
                    out3_ = f3(img2d, filtersflipped)
-                    out3__ = out3_[:,:,0::ss[0],0::ss[1]]
+                    out3__ = out3_#[:,:,0::ss[0],0::ss[1]]
                    out3___ = out3__.copy()
                    out3 = out3___ + biasvals.reshape(1,nkern,1,1)
                    assert (N.abs(out2_-out3_)<1e-5).all()
@@ -303,6 +305,7 @@ class TestConvOp(unittest.TestCase):

    def test_multilayer_conv(self):
        # fixed parameters
+        # test multiple configuration at the same time
        bsizes = [6,6] # batch size
        imshp_starts = [(1,13,14),(1,4,3)]
        kshpss = ([[5,6],[7,4]],[[2,2],[2,2]])
@@ -311,6 +314,7 @@ class TestConvOp(unittest.TestCase):
        convmodes = ['valid','full']
        do_convolve2=True
        unroll = [(0,0),(1,1),(2,2),(3,2)]#(batch,kern)
+        do_speed_test = False

        # TODO: this version show a bug that was fixed
        # the test is included in the upper test.
@@ -319,15 +323,6 @@ class TestConvOp(unittest.TestCase):
 #        nkerns = [2,2] # per output pixel
 #        ssizes = [(1,1),(2,2)]#2,2)]

-        #test speed
-#        bsize = 10 # batch size
-#        imshp_start = (1,50,49)#un square shape to test more corner case.
-#        kshps = ([11,12],[12,11])#un square shape to test more corner case.
-#        nkerns = [20,20] # per output pixel
-#        ssizes = [(1,1),]#(1,1)]#(2,2) bugged
-#        convmodes = ['valid','full']
-#        do_convolve2=False
-
        N.set_printoptions(threshold=N.nan)

        # symbolic stuff
@@ -338,7 +333,7 @@ class TestConvOp(unittest.TestCase):
        for i in range(len(kshpss)):
            assert len(kshpss[i])==len(nkernss[i])==len(kerns)

-        if False:
+        if do_speed_test:
            # calculate the speed up of different combination of unroll
            # put the paramter to the same you will try. 
            
@@ -420,9 +415,47 @@ class TestConvOp(unittest.TestCase):


    def test_ConvOpGrad(self):
+<<<<<<< /u/bastienf/repos/Theano.ConvOp.dx/theano/sandbox/test_conv.py
        """
        test the gradient in float and double
        """
+=======
+        nkern = 3
+        bsize = 2
+        imgs  = T.dmatrix('imgs')
+        kerns = T.dmatrix('kerns')
+        kshps = [(3,3), (5,5)]
+
+        for mode in 'valid', 'full':
+
+            for imshp in (5,5),(2,3,3),(3,6,6): # (12,10), (3,12,11):
+                # 'full' mode should support kernels bigger than the input
+                if mode == 'valid' and (kshps[0] > imshp[1]):
+                    continue
+
+                visdim = 1 if len(imshp)!=3 else imshp[0]
+                for kshp in kshps:
+                    imgvals = N.random.random(N.hstack((bsize,imshp)))
+#                    print 'imgvals.shape = ', imgvals.shape
+                    imgvals = imgvals.reshape(bsize,-1)
+
+                    if visdim == 1: 
+                        kernvals = N.random.rand(nkern,kshp[0],kshp[1])
+                    else:
+                        kernvals = N.random.rand(nkern,visdim,kshp[0],kshp[1])
+                    kernvals = kernvals.reshape(nkern,-1)
+
+                    def testf(imgs, kerns):
+                        out, outshp = convolve2(kerns, kshp, nkern, imgs, 
+                                                   imshp, bsize, mode=mode)
+                        return out
+
+                    try:
+                        utt.verify_grad(testf, [imgvals, kernvals])
+                    except NotImplementedError, e:
+                        print e
+
+    def test_ConvOpGrad32(self):
        nkern = 4
        bsize = 3
        types = ["float32", "float64"]
@@ -468,19 +501,22 @@ class TestConvOp(unittest.TestCase):
                                                tol=None if typ!="float32" else 0.16)

 if __name__ == '__main__':
-#    t = TestConvOp("test_convolution")
+    t = TestConvOp("test_convolution")
 #    t.test_convolution()
-#    t.test_multilayer_conv()
+    t.test_multilayer_conv()
 #    from theano.tests import main
 #    main("test_sp")
-    bsize = 20 # batch size
-    imshp_start = (1,100,100)#un square shape to test more corner case.
-    kshps = ([11,12],[12,11])#un square shape to test more corner case.
-    nkerns = [20,20] # per output pixel
-    ssizes = [(1,1),]#(1,1)]#(2,2) bugged
-    convmodes = ['valid','full']
-    unroll_batch = 5
-    unroll_kern = 2
-    ctot=0
-    tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5)
-    print "total exec time %.3fs"%tctot
+    if False:
+        #used to lanch 8 jobs at the same time.
+        bsize = 20 # batch size
+        imshp_start = (1,100,100)#un square shape to test more corner case.
+        kshps = ([11,12],[12,11])#un square shape to test more corner case.
+        nkerns = [20,20] # per output pixel
+        ssizes = [(1,1),]#(1,1)]#(2,2) bugged
+        convmodes = ['valid','full']
+        unroll_batch = 5
+        unroll_kern = 2
+        ctot=0
+        tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5)
+        print "total exec time %.3fs"%tctot
+