提交 aa71b758 authored 作者: Frederic's avatar Frederic

When only 1 core or OMP_NUM_THREADS==1, don't enable openmp by default.

上级 b6dc1b14
......@@ -173,7 +173,8 @@ import theano and print the config variable, as in:
Bool value: either True or False
Default: True
Default: True if the environment variable OMP_NUM_THREADS!=1 or
if we detect more then 1 CPU core. Otherwise False.
Enable or not parallel computation on the CPU with OpenMP.
It is the default value used when creating an Op that support it.
......
......@@ -5,7 +5,7 @@ import subprocess
from theano.configparser import (
AddConfigVar, BoolParam, ConfigParam, EnumStr, IntParam,
TheanoConfigParser)
from theano.misc.cpucount import cpuCount
_logger = logging.getLogger('theano.configdefaults')
......@@ -16,12 +16,35 @@ AddConfigVar('floatX',
EnumStr('float64', 'float32'),
)
#http://pyprocessing.berlios.de/
#True if the environment variable OMP_NUM_THREADS!=1 or
#if we detect more then 1 CPU core. Otherwise False.
default_openmp = True
var = os.getenv('OMP_NUM_THREADS', None)
if var:
try:
int(var)
except ValueError:
raise TypeError("The environment variable OMP_NUM_THREADS"
" should be a number, got '%s'." % var)
else:
default_openmp = not int(var) == 1
else:
count = cpuCount()
if count == -1:
_logger.warning("We are not able to detect the number of CPU cores."
" We disable openmp by default. To remove this"
" warning, set the environment variable"
" OMP_NUM_THREADS to the number of threads you"
" want theano to use.")
default_openmp = count > 1
AddConfigVar('openmp',
"Enable or not parallel computation on the CPU with OpenMP. "
"It is the default value used when creating an Op that support it"
". The best is to define it via Theano configuration "
"file or with the environment variable THEANO_FLAGS.",
BoolParam(True),
BoolParam(default_openmp),
in_c_key=False,
)
......
......@@ -321,10 +321,11 @@ class ConvOp(Op):
if (unroll_batch>0 or unroll_kern>0) and not all_shape:
raise Exception("In ConvOp, when using unroll_batch and unroll_nkern, all shape are needed")
if openmp is None:
openmp = theano.config.openmp
if not all_shape or config.openmp:
#TODO: check number of core available when we set the default for openmp
#http://bytes.com/topic/python/answers/825616-how-can-i-check-nbr-cores-computer
# Only this version is parallelized
unroll_patch = True
if imshp is not None:
......
......@@ -374,30 +374,28 @@ class TestConv2D(unittest.TestCase):
self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)
def speed(self):
filter_shape = (5, 10, 8, 8)
for image_shape in [(10, 10, 10, 10), (10, 10, 16, 16), (10, 10, 64, 64)]:
print image_shape
for border_mode in ['valid', 'full']:
input = theano.shared(numpy.random.random(image_shape))
filters = theano.shared(numpy.random.random(filter_shape))
output = conv.conv2d(input, filters, image_shape, filter_shape,
border_mode,
unroll_patch=True)
mode = theano.Mode(linker=theano.gof.vm.VM_Linker(
allow_gc=False,
use_cloop=True))
theano_conv = theano.function([], output, mode=mode)
theano_conv.fn(n_calls=10)
theano_conv.fn.update_profile(theano_conv.profile)
print border_mode, theano_conv.profile.apply_time.values(),
print theano_conv.profile.apply_callcount.values()
"""
shape: (10, 10, 16, 16), (5, 10, 8, 8)
num threads 1 2 4
// kern 5.54e-03s 3.12e-03s 1.99e-03s
// batch 4.22e-03s 1.59e-03s 1.25e-03s
// kern_batch3-5-03s 2.51e-03s 9.15e-04s
"""
for filter_shape in [(1, 5, 4, 4), (5, 5, 4, 4)]:
print filter_shape
for image_shape in [(1, 5, 6, 6),
#(10, 10, 10, 10),
#(10, 10, 16, 16),
#(10, 10, 32, 32)
]:
print image_shape
for border_mode in ['valid', 'full']:
input = theano.shared(numpy.random.random(image_shape))
filters = theano.shared(numpy.random.random(filter_shape))
output = conv.conv2d(input, filters,
image_shape, filter_shape,
border_mode,
unroll_patch=True)
mode = theano.Mode(linker=theano.gof.vm.VM_Linker(
allow_gc=False,
use_cloop=True))
theano_conv = theano.function([], output, mode=mode)
t1 = time.time()
theano_conv.fn(n_calls=500)
t2 = time.time()
print border_mode, t2 - t1, 100
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论