move speed test fct to its own and update it to test speed all case of unroll_patch

f93b1c29 · Frederic Bastien · 304cbe3f · f93b1c29
--- a/theano/sandbox/test_conv.py
+++ b/theano/sandbox/test_conv.py
@@ -41,7 +41,7 @@ def flip(kern, kshp):
 global_rng = N.random.RandomState(3423489)
 dmatrix4=T.TensorType('float64', (False, False, False, False))
-def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_convolve2=False, do_print=True, repeat=1, unroll_patch=0):
+def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_convolve2=False, do_print=True, repeat=1, unroll_patch=False, unroll_patch_size=False, verbose=0):
        # build actual input images
        imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2])
@@ -121,12 +121,12 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
                hidval1=outval.copy()
            # ConvOp
-            if unroll_patch:
+            if unroll_patch and not unroll_patch_size:
                conv_op = ConvOp(dx=ss[0],dy=ss[1], output_mode=conv_mode,
-                                 unroll_patch=unroll_patch)(inputs4, kerns4)
+                                 unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4)
            else:
                conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode,
-                                 unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch)(inputs4, kerns4)
+                                 unroll_batch=unroll_batch, unroll_kern=unroll_kern, unroll_patch=unroll_patch, verbose=verbose)(inputs4, kerns4)
            l1shp=N.hstack((nkern,
                            getFilterOutShp(imshp, kshp, ss, conv_mode)))
            propup2 = function([inputs4, kerns4], conv_op)
@@ -319,55 +319,17 @@ class TestConvOp(unittest.TestCase):
        d=N.asarray(tscipy)/tconvop
        print 'speed up ConvOp vs convolve2d: %.3f'%d.mean(),d
-    def test_multilayer_conv(self):
+    def speed_multilayer_conv(self):
-        print '\n\n*************************************************'
-        print '           TEST MULTILAYER CONVOLUTION' 
-        print '*************************************************'
-        # fixed parameters
-        # test multiple configuration at the same time
-        bsizes = [6,6] # batch size
-        imshp_starts = [(1,13,14),(1,4,5)]
-        kshpss = ([[5,6],[7,4]],[[2,2],[2,2]])
-        nkernss = [[20,40],[2,2]] # per output pixel
-        ssizess = [[(1,1),(1,2)],[(1,1),(2,2)]]
-        convmodes = ['valid','full']
-        do_convolve2=True
-        unroll = [(0,0,True),(0,0,False),(1,1,False),(2,2,False),(3,2,False)]#(batch,kern,patch)
-        do_speed_test = False
-        # TODO: this version show a bug that was fixed
-        # the test is included in the upper test.
-#        imshp_start = (1,4,4)
-#        kshps = ([2,2],[2,2])#,[7,4])
-#        nkerns = [2,2] # per output pixel
-#        ssizes = [(1,1),(2,2)]#2,2)]
-#        bsizes = [1,1] # batch size
-#        imshp_starts = [(1,10,10),(1,5,6)]
-#        kshpss = ([[2,3],[3,2]],[[2,2],[2,2]])
-#        nkernss = [[1,1],[1,1]] # per output pixel
-        N.set_printoptions(threshold=N.nan)
-        # symbolic stuff
-        kerns = [T.matrix(),T.dmatrix()]
-        img = T.dmatrix()
-        rng = N.random.RandomState(3423489)
-        tctot, tpytot, ntot = [], [], []
-        for i in range(len(kshpss)):
-            assert len(kshpss[i])==len(nkernss[i])==len(kerns)
-        if do_speed_test:
            # calculate the speed up of different combination of unroll
            # put the paramter to the same you will try. 
            validate=False# we don't validate the result to have it much faster!
+            verbose=1
            unroll_batch = [1,2,4,5,10,20]
            unroll_kern = [1,2,4,5,10,20]
            unroll_batch = [1,4,5]
            unroll_kern = [1,4,5]
+            unroll_patch = [True, False]
            bsize = 20 # batch size
            imshp_start = (1,48,48)#un square shape to test more corner case.
@@ -381,15 +343,16 @@ class TestConvOp(unittest.TestCase):
            assert len(kshps)==len(nkerns)==len(kerns)
-            timing = N.zeros((len(unroll_batch),len(unroll_kern),3))
+            timing = N.zeros((len(unroll_batch),len(unroll_kern),3,len(convmodes)*len(ssizes)))
            t_b_k=[]
            #calculate the timing with unrolling
+            print 'time unroll batch kern'
            t_=[[ 7.60572791,  3.95069814,  3.74271464], [ 4.05631089,  2.90384555,  2.93613672], [ 3.90551591,  2.92595196,  3.00102282]]
-            best=[]
-            worst=[]
            best=[0.52690219879150391, 2.4266397953033447]
            worst=[0.92042708396911621, 6.8822150230407715]
+            best=[]
+            worst=[]
            t_=[]
            for unroll_b, n_b in zip(unroll_batch,range(len(unroll_batch))):
                for unroll_k, n_k in zip(unroll_kern,range(len(unroll_kern))):
@@ -398,30 +361,31 @@ class TestConvOp(unittest.TestCase):
                        tctot, tpytot, ntot=[],[],[]
                        for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
                            for ss, n_ss in zip(ssizes,range(len(ssizes))):
-                                tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate)
+                                tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate, verbose=verbose,do_print=False)
                                tctot+=[tctot_]
                                tpytot+=[tpytot_]
                                ntot+=[ntot_]
                        if unroll_b==4 and unroll_k==4:
-                            print "unroll 4/4",tctot
+                            #print "unroll 4/4",tctot
                            best=tctot
                        if unroll_b==1 and unroll_k==1:
-                            print "unroll 1/1",tctot
+                            #print "unroll 1/1",tctot
                            worst=tctot
-                        timing[n_b,n_k]=[sum(tctot), sum(tpytot), sum(ntot)]
+                        timing[n_b,n_k]=[tctot, tpytot, ntot]#[sum(tctot), sum(tpytot), sum(ntot)]
            if not t_:
-                t=timing[:,:,0]#We select only the c timing.
+                t=timing[:,:,0,:]#We select only the c timing.
            else:
                t=t_
            t=N.asarray(t)
            #calculate the old timing
+            print 'time old version'
            tctot_=[0.52555489540100098, 6.6634182929992676]
-#            tctot_=[]
            tctot,tpytot,ntot=[],[],[]
+            tctot_=[]
            if not tctot_:
                for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
                    for ss, n_ss in zip(ssizes,range(len(ssizes))):
-                        tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate)
+                        tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate, verbose=verbose,do_print=False)
                        tctot+=[tctot_]
                        tpytot+=[tpytot_]
                        ntot+=[ntot_]
@@ -432,29 +396,73 @@ class TestConvOp(unittest.TestCase):
            print "timing for unrolled version"
            print t_b_k
            print t
+            t_detail=t
+            t = t.sum(axis=2)
            print "max %.3fs"%t.max(), "max param(batch unloop size/kernel unloop size)", t_b_k[t.argmax()]
            print "min %.3fs"%t.min(), "min param(batch unloop size/kernel unloop size)", t_b_k[t.argmin()]
            print "speedup vs (1/1)%.3fx, vs old %.3fx"% (t.max()/t.min(),sum(tctot)/t.min())
            print worst/best,tctot/best
+            #calculate the timing of unroll_patch
+            print 'time unroll_patch'
            tctot_patch = []
+            tctot_patch_size = []
            for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
                for ss, n_ss in zip(ssizes,range(len(ssizes))):
-                     tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=2)
+                    tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False)
-                     tctot_patch += [tctot_]
+                    tctot_patch += [tctot_]
+                    tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate,unroll_patch=True,verbose=verbose,do_print=False,unroll_patch_size=True)
+                    tctot_patch_size += [tctot_]
            t_patch=sum(tctot_patch)
-            print "unroll_patch time", tctot_patch
+            print "unroll_patch without shape time", tctot_patch
            print "speedup vs (1/1)%.3fx, vs old %.3fx"% (t.max()/t_patch,sum(tctot)/t_patch)
            print best/tctot_patch, worst/tctot_patch
+            t_patch_size=sum(tctot_patch_size)
+            print "unroll_patch with shape time", tctot_patch_size
+            print "speedup vs (1/1)%.3fx, vs old %.3fx"% (t.max()/t_patch_size,sum(tctot)/t_patch_size)
+            print best/tctot_patch_size, worst/tctot_patch_size
-            print best
-            print worst
-            print tctot
-            print tctot_patch
            return
+    def test_multilayer_conv(self):
+        print '\n\n*************************************************'
+        print '           TEST MULTILAYER CONVOLUTION' 
+        print '*************************************************'
+        # fixed parameters
+        # test multiple configuration at the same time
+        bsizes = [6,6] # batch size
+        imshp_starts = [(1,13,14),(1,4,5)]
+        kshpss = ([[5,6],[7,4]],[[2,2],[2,2]])
+        nkernss = [[20,40],[2,2]] # per output pixel
+        ssizess = [[(1,1),(1,2)],[(1,1),(2,2)]]
+        convmodes = ['valid','full']
+        do_convolve2=True
+        unroll = [(0,0,True),(0,0,False),(1,1,False),(2,2,False),(3,2,False)]#(batch,kern,patch)
+        # TODO: this version show a bug that was fixed
+        # the test is included in the upper test.
+#        imshp_start = (1,4,4)
+#        kshps = ([2,2],[2,2])#,[7,4])
+#        nkerns = [2,2] # per output pixel
+#        ssizes = [(1,1),(2,2)]#2,2)]
+#        bsizes = [1,1] # batch size
+#        imshp_starts = [(1,10,10),(1,5,6)]
+#        kshpss = ([[2,3],[3,2]],[[2,2],[2,2]])
+#        nkernss = [[1,1],[1,1]] # per output pixel
+        N.set_printoptions(threshold=N.nan)
+        # symbolic stuff
+        kerns = [T.matrix(),T.dmatrix()]
+        img = T.dmatrix()
+        rng = N.random.RandomState(3423489)
+        tctot, tpytot, ntot = [], [], []
+        for i in range(len(kshpss)):
+            assert len(kshpss[i])==len(nkernss[i])==len(kerns)
        for i in range(len(kshpss)):
            for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
                for ss, n_ss in zip(ssizess[i],range(len(ssizess[i]))):