in ConvOp.__init__, if the unroll value are not good, we try to find a good one.…

in ConvOp.__init__, if the unroll value are not good, we try to find a good one. Otherwise we set 1 and print an OPTIMISATION WARNING. We do this as it is probably safer to use default value for the optimisation parameter if they are bad and execute then to fail.

in ConvOp.init, if the unroll value are not good, we try to find a good one.…
ee1dbd1c · Frederic Bastien · 96b7efa1 · ee1dbd1c · ee1dbd1c
--- a/theano/sandbox/conv.py
+++ b/theano/sandbox/conv.py
@@ -43,9 +43,17 @@ class ConvOp(Op):
        self.unroll_kern=unroll_kern
        if self.unroll_batch>0 and self.bsize % self.unroll_batch!=0:
-            raise Exception("unroll_batch(%s) should be 0 or a multiple of bsize(%s)"%(str(self.unroll_batch),str(self.bsize)))
+            if self.bsize<self.unroll_batch:
+                self.unroll_batch = self.bsize
+            else:
+                self.unroll_batch=1
+                print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(%s) must be 0 or a multiple of bsize(%s). We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_batch),str(self.bsize))
        if self.unroll_kern>0 and self.nkern % unroll_kern!=0:
-            raise Exception("unroll_kern(%s) should be 0 or a multiple of nkern(%s)"%(str(self.unroll_kern),str(self.nkern)))
+            if self.nkern<self.unroll_kern:
+                self.unroll_kern = self.nkern
+            else:
+                self.unroll_kern=1
+                print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
        if self.dx!=1 or self.dy!=1:
            print "Warning, dx!=1 or dy!=1 only supported in python mode!"
            raise NotImplementedError()
@@ -146,13 +154,13 @@ class ConvOp(Op):
                un_b = bsize
            else:
                un_b = 1
-                print "WARNING, can't determine a good unroll value for the batch in the gradient. Maybe you can optimize this!"
+                print "OPTIMISATION WARNING: in ConvOp.grad() we can't determine a good unroll value for the batch. Maybe you can optimize this!"
        if un_k!=0 and nkern%un_k!=0:
            if nkern<un_k:
                un_k = nkern
            else:
                un_k = 1
-                print "WARNING, can't determine a good unroll value for the kerner in the gradient. Maybe you can optimize this!"
+                print "OPTIMISATION WARNING: in ConvOp.grad() we can't determine a good unroll value for the kernel. Maybe you can optimize this!"
        dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid',
                    unroll_batch=un_b, unroll_kern=un_k)(img,filters)

--- a/theano/sandbox/test_conv.py
+++ b/theano/sandbox/test_conv.py
@@ -32,6 +32,123 @@ def flip(kern, kshp):
    return flip
+global_rng = N.random.RandomState(3423489)
+dmatrix4=T.TensorType('float64', (False, False, False, False))
+def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_convolve2=False, do_print=True, repeat=1):
+        # build actual input images
+        imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2])
+        a=T.dmatrix()
+        kerns = [a for i in nkerns]
+        inputs4=dmatrix4()
+        kerns4=dmatrix4()
+        # for each layer
+        ntot=0 
+        tctot=0
+        tpytot=0
+        for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))):
+            if do_print:
+                print '************* layer %i ***************' % n_layer
+                print conv_mode, ss, n_layer, kshp, nkern
+            # actual values
+            w = global_rng.random_sample(N.r_[nkern,imshp[0],kshp])
+            w_flip = flip(w,kshp).reshape(w.shape)
+            ## manual implementation
+            # check first stage
+            padimg = imgval
+            if conv_mode == 'full':
+                padimg_shp = N.array(imshp[1:]) + 2*(N.array(kshp) - N.array([1,1]))
+                padimg = N.zeros(N.r_[bsize,imshp[0],padimg_shp])
+                padimg[:, :, kshp[0]-1:-kshp[0]+1, 
+                             kshp[1]-1:-kshp[1]+1] = imgval
+            outshp = N.hstack((nkern, getFilterOutShp(imshp, kshp, ss, conv_mode)))
+            time1 = time.time()
+            outval = N.zeros(N.r_[bsize,outshp])
+            if validate:
+                # causes an atexit problem
+                from scipy.signal.sigtools import _convolve2d
+                from scipy.signal.signaltools import  _valfrommode, _bvalfromboundary
+                val = _valfrommode(conv_mode)
+                bval = _bvalfromboundary('fill')
+                for b in range(bsize): # loop over batches
+                    for n in range(nkern): # loop over filters
+                        for i in range(imshp[0]): # loop over input feature maps
+                            outval[b,n,...] +=  _convolve2d(\
+                                imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]]
+                ntot += time.time() - time1
+            if do_convolve2:
+                ####### test with new sp.convolve2 function ######
+                time1 = time.time()
+                hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp,  
+                                         bsize, (1,1), mode=conv_mode)
+                propup = function([kern, img], hid)
+                propup1 = function([kern, img], hid,mode=Mode(linker="py"))
+                hidval  = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
+                hidval  = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
+                hidval = hidval.reshape(bsize, -1)
+                for i in range(repeat):
+                    hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
+                hidval1  = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
+                hidval1 = hidval1.reshape(bsize, -1)
+                assert (N.abs(hidval-hidval1)<1e-5).all()
+                temp = N.abs(outval.reshape(bsize,-1) - hidval)
+                if validate:
+                    assert (temp < 1e-5).all()
+            else:
+                hid = img #we don't need it, but it make the flow easier flow
+                hidval=outval.copy()#to keep the same memory
+                hidval1=outval.copy()
+            # ConvOp
+            conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
+            l1shp=N.hstack((nkern,
+                            getFilterOutShp(imshp, kshp, ss, conv_mode)))
+            propup2 = function([inputs4, kerns4], conv_op)
+            propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))
+            time1 = time.time()
+            for i in range(repeat):
+                hidval2_ = propup2(imgval,w_flip)
+            hidval2 = hidval2_[:,:,0::ss[0],0::ss[1]]
+            tctot += time.time() - time1
+            if conv_op_py:
+                time1 = time.time()
+                for i in range(repeat):
+                    hidval3_ = propup3(imgval,w_flip)
+                hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
+                tpytot += time.time() - time1
+                assert (N.abs(hidval2-hidval3)<1e-5).all()
+            else:
+                tpytot += 0
+            if validate:
+                temp = N.abs(outval - hidval2)
+                assert (temp < 1e-5).all()
+            if validate and conv_op_py:
+                temp = N.abs(outval - hidval3)
+                assert (temp < 1e-5).all()
+            img, imshp = hid, tuple(outshp)
+            imgval = outval.reshape(bsize,outshp[0],outshp[1],outshp[2])
+        return tctot, tpytot, ntot
 class TestConvOp(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
@@ -185,24 +302,22 @@ class TestConvOp(unittest.TestCase):
        print 'speed up ConvOp vs convolve2d: %.3f'%d.mean(),d
    def test_multilayer_conv(self):
-        # causes an atexit problem
-        from scipy.signal.sigtools import _convolve2d
-        from scipy.signal.signaltools import  _valfrommode, _bvalfromboundary
        # fixed parameters
-        bsize = 1 # batch size
+        bsizes = [6,6] # batch size
-        imshp_start = (1,28,28)
+        imshp_starts = [(1,28,28),(1,4,4)]
-        kshps = ([5,6],[7,4])
+        kshpss = ([[5,6],[7,4]],[[2,2],[2,2]])
-        nkerns = [20,40] # per output pixel
+        nkernss = [[20,40],[2,2]] # per output pixel
-        ssizes = [(1,1),(2,2)]
+        ssizess = [[(1,1),(2,2)],[(1,1),(2,2)]]
        convmodes = ['valid','full']
-        do_theano=True
+        do_convolve2=True
+        unroll = [(0,0),(1,1),(2,2),(3,2)]#(batch,kern)
-        # TODO: this version show a bug.
+        # TODO: this version show a bug that was fixed
-        imshp_start = (1,4,4)
+        # the test is included in the upper test.
-        kshps = ([2,2],[2,2])#,[7,4])
+#        imshp_start = (1,4,4)
-        nkerns = [2,2] # per output pixel
+#        kshps = ([2,2],[2,2])#,[7,4])
-        ssizes = [(1,1),(2,2)]#2,2)]
+#        nkerns = [2,2] # per output pixel
+#        ssizes = [(1,1),(2,2)]#2,2)]
        #test speed
 #        bsize = 10 # batch size
@@ -211,7 +326,7 @@ class TestConvOp(unittest.TestCase):
 #        nkerns = [20,20] # per output pixel
 #        ssizes = [(1,1),]#(1,1)]#(2,2) bugged
 #        convmodes = ['valid','full']
-#        do_theano=False
+#        do_convolve2=False
        N.set_printoptions(threshold=N.nan)
@@ -220,113 +335,8 @@ class TestConvOp(unittest.TestCase):
        img = T.dmatrix()
        rng = N.random.RandomState(3423489)
        tctot, tpytot, ntot = [], [], []
+        for i in range(len(kshpss)):
-        dmatrix4=T.TensorType('float64', (False, False, False, False))
+            assert len(kshpss[i])==len(nkernss[i])==len(kerns)
-        inputs4=dmatrix4()
-        kerns4=dmatrix4()
-        assert len(kshps)==len(nkerns)==len(kerns)
-        def do_test(conv_mode, ss, unroll_batch=0, unroll_kern=0, img=img, validate=True,conv_op_py=False):
-                # build actual input images
-                imgval = rng.rand(bsize, imshp_start[0], imshp_start[1], imshp_start[2])
-                imshp=imshp_start
-                # for each layer
-                ntot=0 
-                tctot=0
-                tpytot=0
-                for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(kerns))):
-                    print '************* layer %i ***************' % n_layer
-                    print conv_mode, ss, n_layer, kshp, nkern
-                    # actual values
-                    w = rng.random_sample(N.r_[nkern,imshp[0],kshp])
-                    w_flip = flip(w,kshp).reshape(w.shape)
-                    ## manual implementation
-                    # check first stage
-                    padimg = imgval
-                    if conv_mode == 'full':
-                        padimg_shp = N.array(imshp[1:]) + 2*(N.array(kshp) - N.array([1,1]))
-                        padimg = N.zeros(N.r_[bsize,imshp[0],padimg_shp])
-                        padimg[:, :, kshp[0]-1:-kshp[0]+1, 
-                                     kshp[1]-1:-kshp[1]+1] = imgval
-                    outshp = N.hstack((nkern, getFilterOutShp(imshp, kshp, ss, conv_mode)))
-                    time1 = time.time()
-                    outval = N.zeros(N.r_[bsize,outshp])
-                    if validate:
-                        val = _valfrommode(conv_mode)
-                        bval = _bvalfromboundary('fill')
-                        for b in range(bsize): # loop over batches
-                            for n in range(nkern): # loop over filters
-                                for i in range(imshp[0]): # loop over input feature maps
-                                    outval[b,n,...] +=  _convolve2d(\
-                                        imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]]
-                        ntot += time.time() - time1
-                    if do_theano:
-                        ####### test with new sp.convolve2 function ######
-                        time1 = time.time()
-                        hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp,  
-                                                 bsize, (1,1), mode=conv_mode)
-                        propup = function([kern, img], hid)
-                        propup1 = function([kern, img], hid,mode=Mode(linker="py"))
-                        hidval  = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
-                        hidval  = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
-                        hidval = hidval.reshape(bsize, -1)
-                        hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
-                        hidval1  = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
-                        hidval1 = hidval1.reshape(bsize, -1)
-                        assert (N.abs(hidval-hidval1)<1e-5).all()
-                        temp = N.abs(outval.reshape(bsize,-1) - hidval)
-                        if validate:
-                            assert (temp < 1e-5).all()
-                    else:
-                        hid = img #we don't need it, but it make the flow easier flow
-                        hidval=outval.copy()#to keep the same memory
-                        hidval1=outval.copy()
-                    # ConvOp
-                    conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
-                    l1shp=N.hstack((nkern,
-                                    getFilterOutShp(imshp, kshp, ss, conv_mode)))
-                    propup2 = function([inputs4, kerns4], conv_op)
-                    propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))
-                    time1 = time.time()
-                    hidval2_ = propup2(imgval,w_flip)
-                    hidval2 = hidval2_[:,:,0::ss[0],0::ss[1]]
-                    tctot += time.time() - time1
-                    if conv_op_py:
-                        time1 = time.time()
-                        hidval3_ = propup3(imgval,w_flip)
-                        hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
-                        tpytot += time.time() - time1
-                        assert (N.abs(hidval2-hidval3)<1e-5).all()
-                    else:
-                        tpytot += 0
-                    if validate:
-                        temp = N.abs(outval - hidval2)
-                        assert (temp < 1e-5).all()
-                    if validate and conv_op_py:
-                        temp = N.abs(outval - hidval3)
-                        assert (temp < 1e-5).all()
-                    img, imshp = hid, tuple(outshp)
-                    imgval = outval.reshape(bsize,outshp[0],outshp[1],outshp[2])
-                return tctot, tpytot, ntot
        if False:
            # calculate the speed up of different combination of unroll
@@ -336,16 +346,16 @@ class TestConvOp(unittest.TestCase):
            unroll_batch = [1,2,4,5,10,20]
            unroll_kern = [1,2,4,5,10,20]
-#            unroll_batch = [1,2,5]
+            unroll_batch = [1,2,5]
-#            unroll_kern = [1,2,5]
+            unroll_kern = [1,2,5]
            bsize = 20 # batch size
-            imshp_start = (1,50,49)#un square shape to test more corner case.
+            imshp_start = (1,48,48)#un square shape to test more corner case.
            kshps = ([11,12],[12,11])#un square shape to test more corner case.
            nkerns = [20,20] # per output pixel
            ssizes = [(1,1),]#(1,1)]#(2,2) bugged
            convmodes = ['valid','full']
-            do_theano=False
+            do_convolve2=False
            a=T.dmatrix()
            kerns = [a for i in nkerns]
@@ -360,7 +370,7 @@ class TestConvOp(unittest.TestCase):
                    tctot, tpytot, ntot=[],[],[]
                    for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
                        for ss, n_ss in zip(ssizes,range(len(ssizes))):
-                            tctot_, tpytot_, ntot_ = do_test(conv_mode, ss, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate)
+                            tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate)
                            tctot+=[tctot_]
                            tpytot+=[tpytot_]
                            ntot+=[ntot_]
@@ -370,7 +380,7 @@ class TestConvOp(unittest.TestCase):
            tctot,tpytot,ntot=0,0,0
            for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
                for ss, n_ss in zip(ssizes,range(len(ssizes))):
-                    tctot_, tpytot_, ntot_ = do_test(conv_mode, ss, unroll_batch=0, unroll_kern=0, validate=validate)
+                    tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate)
                    tctot+=tctot_
                    tpytot+=tpytot_
                    ntot+=ntot_
@@ -385,13 +395,19 @@ class TestConvOp(unittest.TestCase):
            print "min %.3fs"%t.min(), "min param(batch unloop size/kernel unloop size)", t_b_k[t.argmin()]
            print "speedup vs (1/1)%.3fx, vs old %.3fx"% (t.max()/t.min(),tctot/t.min())
            return
-        for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
+        for i in range(len(kshpss)):
-            for ss, n_ss in zip(ssizes,range(len(ssizes))):
+            for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
-                tctot_, tpytot_, ntot_ = do_test(conv_mode, ss)
+                for ss, n_ss in zip(ssizess[i],range(len(ssizess[i]))):
-                tctot+=[tctot_]
+                    for un_b, un_k in unroll:
-                tpytot+=[tpytot_]
+                        tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
-                ntot+=[ntot_]
+                            conv_mode, ss, bsizes[i], imshp_starts[i], 
+                            kshpss[i], nkernss[i],
+                            img=img, unroll_batch=un_b, unroll_kern=un_k,
+                            validate=True)
+                        tctot+=[tctot_]
+                        tpytot+=[tpytot_]
+                        ntot+=[ntot_]
        print '**** Multilayer Convolution Profiling Results ****'
        print 'Numpy convolve2d processing time: %.3fs'%sum(ntot),ntot
@@ -465,8 +481,19 @@ class TestConvOp(unittest.TestCase):
                    utt.verify_grad(testf, [imgvals, kernvals])
 if __name__ == '__main__':
-    t = TestConvOp("test_convolution")
+#    t = TestConvOp("test_convolution")
-    t.test_convolution()
+#    t.test_convolution()
 #    t.test_multilayer_conv()
 #    from theano.tests import main
 #    main("test_sp")
+    bsize = 20 # batch size
+    imshp_start = (1,100,100)#un square shape to test more corner case.
+    kshps = ([11,12],[12,11])#un square shape to test more corner case.
+    nkerns = [20,20] # per output pixel
+    ssizes = [(1,1),]#(1,1)]#(2,2) bugged
+    convmodes = ['valid','full']
+    unroll_batch = 5
+    unroll_kern = 2
+    ctot=0
+    tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5)
+    print "total exec time %.3fs"%tctot