提交 223a6ab1 authored 作者: Frederic's avatar Frederic

Move the compiler support for openmp to compilation.

We don't want to do it at import time.
上级 c3b833d2
......@@ -52,7 +52,7 @@ from gof import \
Container, \
InconsistencyError, FunctionGraph, \
Apply, Variable, Constant, \
Op, \
Op, OpenMPOp,\
opt, \
toolbox, \
Type, Generic, generic, \
......@@ -106,9 +106,6 @@ if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
import theano.sandbox.cuda.tests.test_driver
theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1()
import configdefaults_late
# Use config.numpy to call numpy.seterr
import numpy
if config.numpy.seterr_all == 'None':
......
......@@ -376,3 +376,39 @@ AddConfigVar('exception_verbosity',
C. log_likelihood_h""",
EnumStr('low', 'high'),
in_c_key=False)
#Test if the env variable is set
var = os.getenv('OMP_NUM_THREADS', None)
if var:
try:
int(var)
except ValueError:
raise TypeError("The environment variable OMP_NUM_THREADS"
" should be a number, got '%s'." % var)
else:
default_openmp = not int(var) == 1
else:
#Check the number of cores availables.
count = cpuCount()
if count == -1:
_logger.warning("We are not able to detect the number of CPU cores."
" We disable openmp by default. To remove this"
" warning, set the environment variable"
" OMP_NUM_THREADS to the number of threads you"
" want theano to use.")
default_openmp = count > 1
AddConfigVar('openmp',
"Allow (or not) parallel computation on the CPU with OpenMP. "
"This is the default value used when creating an Op that "
"supports OpenMP parallelization. It is preferable to define it "
"via the Theano configuration file ~/.theanorc or with the "
"environment variable THEANO_FLAGS. Parallelization is only "
"done for some operations that implement it, and even for "
"operations that implement parallelism, each operation is free "
"to respect this flag or not. You can control the number of "
"threads used with the environment variable OMP_NUM_THREADS."
" If it is set to 1, we disable openmp in Theano by default.",
BoolParam(default_openmp),
in_c_key=False,
)
"""
This file defines Theano flags which need to be defined late in import order.
This is needed as they rely on the values of other previously-defined flags.
"""
import os
import logging
import subprocess
import tempfile
import theano
from theano.configparser import (
AddConfigVar, BoolParam, ConfigParam, EnumStr, IntParam,
TheanoConfigParser)
from theano.misc.cpucount import cpuCount
_logger = logging.getLogger('theano.configdefaults_late')
config = TheanoConfigParser()
#http://pyprocessing.berlios.de/
#True if the environment variable (OMP_NUM_THREADS!=1 or
#if we detect more then 1 CPU core) and g++ support OpenMP
#Otherwise False.
default_openmp = True
#Test if the env variable is set
var = os.getenv('OMP_NUM_THREADS', None)
if var:
try:
int(var)
except ValueError:
raise TypeError("The environment variable OMP_NUM_THREADS"
" should be a number, got '%s'." % var)
else:
default_openmp = not int(var) == 1
else:
#Check the number of cores availables.
count = cpuCount()
if count == -1:
_logger.warning("We are not able to detect the number of CPU cores."
" We disable openmp by default. To remove this"
" warning, set the environment variable"
" OMP_NUM_THREADS to the number of threads you"
" want theano to use.")
default_openmp = count > 1
dummy_stdin = open(os.devnull)
if default_openmp and theano.configdefaults.gxx_avail:
#check if g++ supports openmp. We need to compile a file as the EPD
#version has openmp enabled in the specs file but does not include
#the OpenMP files.
try:
code = """
#include <omp.h>
int main( int argc, const char* argv[] )
{
int res[10];
for(int i=0; i < 10; i++){
res[i] = i;
}
}
"""
fd, path = tempfile.mkstemp(suffix='.c', prefix='test_omp_')
try:
os.write(fd, code)
os.close(fd)
fd = None
proc = subprocess.Popen(['g++', '-fopenmp', path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=dummy_stdin.fileno())
proc.wait()
if proc.returncode != 0:
default_openmp = False
finally:
# Ensure `fd` is closed before we remove the temporary file.
try:
if fd is not None:
os.close(fd)
finally:
os.remove(path)
except OSError, e:
default_openmp = False
del dummy_stdin
AddConfigVar('openmp',
"Enable (or not) parallel computation on the CPU with OpenMP. "
"This is the default value used when creating an Op that "
"supports OpenMP parallelization. It is preferable to define it "
"via the Theano configuration file ~/.theanorc or with the "
"environment variable THEANO_FLAGS. Parallelization is only "
"done for some operations that implement it, and even for "
"operations that implement parallelism, each operation is free "
"to respect this flag or not.",
BoolParam(default_openmp),
in_c_key=False,
)
......@@ -55,7 +55,7 @@ from link import \
Container, Linker, LocalLinker, PerformLinker, WrapLinker, WrapLinkerMany
from op import \
Op, PureOp, ops_with_inner_function
Op, OpenMPOp, PureOp, ops_with_inner_function
from opt import (Optimizer, optimizer, SeqOptimizer,
MergeOptimizer, MergeOptMerge,
......
......@@ -11,8 +11,11 @@ __contact__ = "theano-dev <theano-dev@googlegroups.com>"
__docformat__ = "restructuredtext en"
import copy
import logging
import os
import subprocess
import tempfile
import warnings
import theano
......@@ -781,3 +784,90 @@ We need that to be able not to run debug checks a number of times that is
exponential in the nesting level of those ops.
For instance, Scan will be registered here.
"""
class OpenMPOp(Op):
"""All op using OpenMP code should inherit from this Op.
This op will check that the compiler support correctly OpenMP code.
If not, it will print a warning and disable openmp for this Op.
Then it will generate the not OpenMP code.
This is needed as EPD on Windows g++ version spec information tell
it support OpenMP, but does not include the OpenMP files.
We also add the correct compiler flags in c_compile_args.
"""
gxx_support_openmp = None
"""
True/False after we tested this.
"""
def __init__(self, openmp=None):
if openmp is None:
openmp = theano.config.openmp
def c_compile_args(self):
if self.openmp:
return ['-fopenmp']
return []
@staticmethod
def test_gxx_support():
try:
code = """
#include <omp.h>
int main( int argc, const char* argv[] )
{
int res[10];
for(int i=0; i < 10; i++){
res[i] = i;
}
}
"""
fd, path = tempfile.mkstemp(suffix='.c', prefix='test_omp_')
dummy_stdin = open(os.devnull)
try:
os.write(fd, code)
os.close(fd)
fd = None
proc = subprocess.Popen(['g++', '-fopenmp', path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=dummy_stdin.fileno())
proc.wait()
if proc.returncode != 0:
default_openmp = False
finally:
del dummy_stdin
# Ensure `fd` is closed before we remove the temporary file.
try:
if fd is not None:
os.close(fd)
finally:
os.remove(path)
except OSError, e:
return False
return True
def make_thunk(self, node, storage_map, compute_map, no_recycling):
op = self
if self.openmp:
if OpenMPOp.gxx_support_openmp is None:
OpenMPOp.gxx_support_openmp = OpenMPOp.test_gxx_support()
if not OpenMPOp.gxx_support_openmp:
#We want to warn only once.
warnings.warn(
"Your g++ compiler fails to compile OpenMP code. We"
" know this happen with some version of the EPD mingw"
" compiler. We disable openmp everywhere in Theano."
" To remove this warning set the theano flags `openmp`"
" to False.")
if OpenMPOp.gxx_support_openmp is False:
op = copy.copy(self)
op.openmp = False
theano.config.openmp = False
return super(OpenMPOp, op).make_thunk(node, storage_map,
compute_map, no_recycling)
......@@ -17,7 +17,7 @@ import numpy
import theano
from theano.tensor import (as_tensor_variable, blas, get_constant_value,
patternbroadcast)
from theano import Op, config
from theano import OpenMPOp, config
from theano.gof import Apply
from theano.gof.python25 import any
......@@ -128,7 +128,7 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
return op(input, filters)
class ConvOp(Op):
class ConvOp(OpenMPOp):
"""
This Op serves a dual purpose: it can implement a vanilla 2D convolution
(as taught in any signal processing class) or implement the
......@@ -358,10 +358,10 @@ class ConvOp(Op):
raise Exception("In ConvOp, when using unroll_batch and"
" unroll_nkern, all shape are needed")
if openmp is None:
openmp = theano.config.openmp
#Init the openmp attribute
super(ConvOp, self).__init__(openmp=openmp)
if not all_shape or config.openmp:
if not all_shape or self.openmp:
# Only this version is parallelized
unroll_patch = True
......@@ -386,9 +386,6 @@ class ConvOp(Op):
self.dy = dy
self.verbose = verbose
self.version = version
if openmp is None:
openmp = config.openmp
self.openmp = openmp
# a triple
self.imshp_logical = self.imshp
......@@ -1010,8 +1007,8 @@ using namespace std;
if (theano.gof.cmodule.gcc_version() in ['4.3.0'] and
self.kshp == (1, 1)):
ret += ['-O2']
if self.openmp:
ret += ['-fopenmp']
#Add the -fopenmp flags
ret += super(ConvOp, self).c_compile_args()
return ret
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论