better implementation of the speed test of fusion.

9b8d4456 · Frederic Bastien · 1ffee9f0 · 9b8d4456
--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -1014,40 +1014,35 @@ class test_fusion(unittest.TestCase):
        param s: a slice to apply to the case to execute. If None, exec all case.
        """
-        import copy
        shp=(3000,3000)
-        #mode1=copy.copy(compile.mode.predefined_modes['FAST_RUN'])
+#        linker=gof.CLinker
-        linker=gof.CLinker
+#        linker=gof.OpWiseCLinker
-        linker=gof.OpWiseCLinker
-        mode1=compile.Mode(linker(), copy.copy(compile.mode.OPT_FAST_RUN))
+        mode1=cp(compile.get_default_mode())
+        mode1._optimizer=mode1._optimizer.including('local_elemwise_fusion')
        #TODO:clinker is much faster... but use to much memory
        #Possible cause: as their is do deletion of intermediate value when we don't keep the fct.
        #More plausible cause: we keep a link to the output data?
        #Follow up. Clinker do the same... second cause?
-        mode2=compile.Mode(linker(), copy.copy(compile.mode.OPT_FAST_RUN))
+        mode2=cp(compile.get_default_mode())
-#        mode2=copy.copy(compile.mode.predefined_modes['FAST_RUN'])
+        mode2._optimizer=mode2._optimizer.excluding('local_elemwise_fusion')
-        old_optimizer = mode2._optimizer
+        if s is None:
-        try:
+            s=slice(0,49)
-            mode2._optimizer=mode2._optimizer.excluding('local_elemwise_fusion')
+            s=slice(0,10)
-    #        mode2=compile.Mode(gof.OpWiseCLinker(allow_gc=True), compile.mode.OPT_FAST_COMPILE)
+            #s=slice(49,59)
+        nb_repeat=10
-            if s is None:
+        print "test with linker", str(mode1.linker)
-                s=slice(0,49)
+        times1=self.do(mode1, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat, assert_len_topo=False,slice=s)
-                #s=slice(49,59)
+        times2=self.do(mode2, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat, assert_len_topo=False,slice=s)
-            nb_repeat=10
+        print "times1 with local_elemwise_fusion"
-            print "test with linker", str(linker)
+        print times1, times1.min(), times1.max(), times1.sum()
-            times1=self.do(mode1, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat, assert_len_topo=False,slice=s)
+        print "times2 without local_elemwise_fusion"
-            times2=self.do(mode2, shared_fn, shp, gpu=gpu, nb_repeat=nb_repeat, assert_len_topo=False,slice=s)
+        print times2, times2.min(), times2.max(), times2.sum()
-            print "times1 FAST_RUN optimisation"
+        d=times2/times1
-            print times1, times1.min(), times1.max(), times1.sum()
-            print "times2 FAST_RUN optimisation without local_elemwise_fusion"
+        print "times2/times1"
-            print times2, times2.min(), times2.max(), times2.sum()
+        print d
-            d=times2/times1
+        print "min", d.min(), "argmin", d.argmin(), "max", d.max(), "mean", d.mean(), "std", d.std()
-    #        d.sort()
-            print "times2/times1",d
-            print "min", d.min(), "argmin", d.argmin(), "max", d.max(), "mean", d.mean(), "std", d.std()
-        finally:
-            mode2._optimizer = old_optimizer
    def speed_fusion_gpu(self):
        import theano_cuda_ndarray as tcn