When only 1 core or OMP_NUM_THREADS==1, don't enable openmp by default.

aa71b758 · Frederic · b6dc1b14 · aa71b758 · aa71b758 · aa71b758
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -173,7 +173,8 @@ import theano and print the config variable, as in:

    Bool value: either True or False

-    Default: True
+    Default: True if the environment variable OMP_NUM_THREADS!=1 or
+             if we detect more then 1 CPU core. Otherwise False.

    Enable or not parallel computation on the CPU with OpenMP.
    It is the default value used when creating an Op that support it.

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -5,7 +5,7 @@ import subprocess
 from theano.configparser import (
        AddConfigVar, BoolParam, ConfigParam, EnumStr, IntParam,
        TheanoConfigParser)
-
+from theano.misc.cpucount import cpuCount

 _logger = logging.getLogger('theano.configdefaults')

@@ -16,12 +16,35 @@ AddConfigVar('floatX',
        EnumStr('float64', 'float32'),
        )

+#http://pyprocessing.berlios.de/
+#True if the environment variable OMP_NUM_THREADS!=1 or
+#if we detect more then 1 CPU core. Otherwise False.
+default_openmp = True
+var = os.getenv('OMP_NUM_THREADS', None)
+if var:
+    try:
+        int(var)
+    except ValueError:
+        raise TypeError("The environment variable OMP_NUM_THREADS"
+                        " should be a number, got '%s'." % var)
+    else:
+        default_openmp = not int(var) == 1
+else:
+    count = cpuCount()
+    if count == -1:
+        _logger.warning("We are not able to detect the number of CPU cores."
+                        " We disable openmp by default. To remove this"
+                        " warning, set the environment variable"
+                        " OMP_NUM_THREADS to the number of threads you"
+                        " want theano to use.")
+    default_openmp = count > 1
+
 AddConfigVar('openmp',
             "Enable or not parallel computation on the CPU with OpenMP. "
             "It is the default value used when creating an Op that support it"
             ". The best is to define it via Theano configuration "
             "file or with the environment variable THEANO_FLAGS.",
-             BoolParam(True),
+             BoolParam(default_openmp),
             in_c_key=False,
         )


--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -321,10 +321,11 @@ class ConvOp(Op):
        if (unroll_batch>0 or unroll_kern>0) and not all_shape:
            raise Exception("In ConvOp, when using unroll_batch and unroll_nkern, all shape are needed")

+        if openmp is None:
+            openmp = theano.config.openmp

        if not all_shape or config.openmp:
-            #TODO: check number of core available when we set the default for openmp
-            #http://bytes.com/topic/python/answers/825616-how-can-i-check-nbr-cores-computer
+            # Only this version is parallelized
            unroll_patch = True

        if imshp is not None:

--- a/theano/tensor/nnet/tests/test_conv.py
+++ b/theano/tensor/nnet/tests/test_conv.py
@@ -374,30 +374,28 @@ class TestConv2D(unittest.TestCase):
        self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)

    def speed(self):
-        filter_shape = (5, 10, 8, 8)
-        for image_shape in [(10, 10, 10, 10), (10, 10, 16, 16), (10, 10, 64, 64)]:
-            print image_shape
-            for border_mode in ['valid', 'full']:
-
-                input = theano.shared(numpy.random.random(image_shape))
-                filters = theano.shared(numpy.random.random(filter_shape))
-
-                output = conv.conv2d(input, filters, image_shape, filter_shape,
-                                     border_mode,
-                                     unroll_patch=True)
-                mode = theano.Mode(linker=theano.gof.vm.VM_Linker(
-                    allow_gc=False,
-                    use_cloop=True))
-                theano_conv = theano.function([], output, mode=mode)
-                theano_conv.fn(n_calls=10)
-                theano_conv.fn.update_profile(theano_conv.profile)
-                print border_mode, theano_conv.profile.apply_time.values(),
-                print theano_conv.profile.apply_callcount.values()
-
-        """
-        shape: (10, 10, 16, 16), (5, 10, 8, 8)
-        num threads       1          2          4
-        // kern      5.54e-03s  3.12e-03s  1.99e-03s
-        // batch     4.22e-03s  1.59e-03s  1.25e-03s
-        // kern_batch3-5-03s    2.51e-03s  9.15e-04s
-        """
+        for filter_shape in [(1, 5, 4, 4), (5, 5, 4, 4)]:
+            print filter_shape
+            for image_shape in [(1, 5, 6, 6),
+                                #(10, 10, 10, 10),
+                                #(10, 10, 16, 16),
+                                #(10, 10, 32, 32)
+                                ]:
+                print image_shape
+                for border_mode in ['valid', 'full']:
+
+                    input = theano.shared(numpy.random.random(image_shape))
+                    filters = theano.shared(numpy.random.random(filter_shape))
+
+                    output = conv.conv2d(input, filters,
+                                         image_shape, filter_shape,
+                                         border_mode,
+                                         unroll_patch=True)
+                    mode = theano.Mode(linker=theano.gof.vm.VM_Linker(
+                        allow_gc=False,
+                        use_cloop=True))
+                    theano_conv = theano.function([], output, mode=mode)
+                    t1 = time.time()
+                    theano_conv.fn(n_calls=500)
+                    t2 = time.time()
+                    print border_mode, t2 - t1, 100