提交 aa71b758 authored 作者: Frederic's avatar Frederic

When only 1 core or OMP_NUM_THREADS==1, don't enable openmp by default.

上级 b6dc1b14
...@@ -173,7 +173,8 @@ import theano and print the config variable, as in: ...@@ -173,7 +173,8 @@ import theano and print the config variable, as in:
Bool value: either True or False Bool value: either True or False
Default: True Default: True if the environment variable OMP_NUM_THREADS!=1 or
if we detect more then 1 CPU core. Otherwise False.
Enable or not parallel computation on the CPU with OpenMP. Enable or not parallel computation on the CPU with OpenMP.
It is the default value used when creating an Op that support it. It is the default value used when creating an Op that support it.
......
...@@ -5,7 +5,7 @@ import subprocess ...@@ -5,7 +5,7 @@ import subprocess
from theano.configparser import ( from theano.configparser import (
AddConfigVar, BoolParam, ConfigParam, EnumStr, IntParam, AddConfigVar, BoolParam, ConfigParam, EnumStr, IntParam,
TheanoConfigParser) TheanoConfigParser)
from theano.misc.cpucount import cpuCount
_logger = logging.getLogger('theano.configdefaults') _logger = logging.getLogger('theano.configdefaults')
...@@ -16,12 +16,35 @@ AddConfigVar('floatX', ...@@ -16,12 +16,35 @@ AddConfigVar('floatX',
EnumStr('float64', 'float32'), EnumStr('float64', 'float32'),
) )
#http://pyprocessing.berlios.de/
#True if the environment variable OMP_NUM_THREADS!=1 or
#if we detect more then 1 CPU core. Otherwise False.
default_openmp = True
var = os.getenv('OMP_NUM_THREADS', None)
if var:
try:
int(var)
except ValueError:
raise TypeError("The environment variable OMP_NUM_THREADS"
" should be a number, got '%s'." % var)
else:
default_openmp = not int(var) == 1
else:
count = cpuCount()
if count == -1:
_logger.warning("We are not able to detect the number of CPU cores."
" We disable openmp by default. To remove this"
" warning, set the environment variable"
" OMP_NUM_THREADS to the number of threads you"
" want theano to use.")
default_openmp = count > 1
AddConfigVar('openmp', AddConfigVar('openmp',
"Enable or not parallel computation on the CPU with OpenMP. " "Enable or not parallel computation on the CPU with OpenMP. "
"It is the default value used when creating an Op that support it" "It is the default value used when creating an Op that support it"
". The best is to define it via Theano configuration " ". The best is to define it via Theano configuration "
"file or with the environment variable THEANO_FLAGS.", "file or with the environment variable THEANO_FLAGS.",
BoolParam(True), BoolParam(default_openmp),
in_c_key=False, in_c_key=False,
) )
......
...@@ -321,10 +321,11 @@ class ConvOp(Op): ...@@ -321,10 +321,11 @@ class ConvOp(Op):
if (unroll_batch>0 or unroll_kern>0) and not all_shape: if (unroll_batch>0 or unroll_kern>0) and not all_shape:
raise Exception("In ConvOp, when using unroll_batch and unroll_nkern, all shape are needed") raise Exception("In ConvOp, when using unroll_batch and unroll_nkern, all shape are needed")
if openmp is None:
openmp = theano.config.openmp
if not all_shape or config.openmp: if not all_shape or config.openmp:
#TODO: check number of core available when we set the default for openmp # Only this version is parallelized
#http://bytes.com/topic/python/answers/825616-how-can-i-check-nbr-cores-computer
unroll_patch = True unroll_patch = True
if imshp is not None: if imshp is not None:
......
...@@ -374,30 +374,28 @@ class TestConv2D(unittest.TestCase): ...@@ -374,30 +374,28 @@ class TestConv2D(unittest.TestCase):
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False) self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)
def speed(self): def speed(self):
filter_shape = (5, 10, 8, 8) for filter_shape in [(1, 5, 4, 4), (5, 5, 4, 4)]:
for image_shape in [(10, 10, 10, 10), (10, 10, 16, 16), (10, 10, 64, 64)]: print filter_shape
print image_shape for image_shape in [(1, 5, 6, 6),
for border_mode in ['valid', 'full']: #(10, 10, 10, 10),
#(10, 10, 16, 16),
input = theano.shared(numpy.random.random(image_shape)) #(10, 10, 32, 32)
filters = theano.shared(numpy.random.random(filter_shape)) ]:
print image_shape
output = conv.conv2d(input, filters, image_shape, filter_shape, for border_mode in ['valid', 'full']:
border_mode,
unroll_patch=True) input = theano.shared(numpy.random.random(image_shape))
mode = theano.Mode(linker=theano.gof.vm.VM_Linker( filters = theano.shared(numpy.random.random(filter_shape))
allow_gc=False,
use_cloop=True)) output = conv.conv2d(input, filters,
theano_conv = theano.function([], output, mode=mode) image_shape, filter_shape,
theano_conv.fn(n_calls=10) border_mode,
theano_conv.fn.update_profile(theano_conv.profile) unroll_patch=True)
print border_mode, theano_conv.profile.apply_time.values(), mode = theano.Mode(linker=theano.gof.vm.VM_Linker(
print theano_conv.profile.apply_callcount.values() allow_gc=False,
use_cloop=True))
""" theano_conv = theano.function([], output, mode=mode)
shape: (10, 10, 16, 16), (5, 10, 8, 8) t1 = time.time()
num threads 1 2 4 theano_conv.fn(n_calls=500)
// kern 5.54e-03s 3.12e-03s 1.99e-03s t2 = time.time()
// batch 4.22e-03s 1.59e-03s 1.25e-03s print border_mode, t2 - t1, 100
// kern_batch3-5-03s 2.51e-03s 9.15e-04s
"""
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论