提交 f374e21e authored 作者: lamblin's avatar lamblin

Merge pull request #465 from nouiz/compiler

Compiler
...@@ -11,6 +11,14 @@ Since 0.5rc2 ...@@ -11,6 +11,14 @@ Since 0.5rc2
* Fix a bug with Gemv and Ger on CPU, when used on vectors with negative * Fix a bug with Gemv and Ger on CPU, when used on vectors with negative
strides. Data was read from incorrect (and possibly uninitialized) strides. Data was read from incorrect (and possibly uninitialized)
memory space. This bug was probably introduced in 0.5rc1. memory space. This bug was probably introduced in 0.5rc1.
* The Theano flag "nvcc.flags" are now included in the hard part of the key.
This mean that now we recompile all modules for each value of "nvcc.flags".
This do use change the default, but if you used this flags, it was ignored
for module already compiled.
* The Theano flag "nvcc.fastmath" is now also used for the cuda_ndarray.cu file.
* Add the header_dirs to the hard part of the compilation key. This is
currently used only by cuda, but if we use library that are only headers,
this can be useful.
============= =============
Release Notes Release Notes
...@@ -189,7 +197,7 @@ Crashes fixed: ...@@ -189,7 +197,7 @@ Crashes fixed:
* "Interactive debugger" crash fix. (Ian, Frederic) * "Interactive debugger" crash fix. (Ian, Frederic)
* Do not call gemm with strides 0, some blas refuse it. (Pascal Lamblin) * Do not call gemm with strides 0, some blas refuse it. (Pascal Lamblin)
* Optimization crash with gemm and complex. (Frederic) * Optimization crash with gemm and complex. (Frederic)
* GPU crash with elemwise. (Frederic) * GPU crash with elemwise. (Frederic, some reported by Chris Currivan)
* Compilation crash with amdlibm and the GPU. (Frederic) * Compilation crash with amdlibm and the GPU. (Frederic)
* IfElse crash. (Frederic) * IfElse crash. (Frederic)
* Execution crash fix in AdvancedSubtensor1 on 32 bit computers. (Pascal) * Execution crash fix in AdvancedSubtensor1 on 32 bit computers. (Pascal)
......
...@@ -367,7 +367,7 @@ correctly (for example, for MKL this might be ``-lmkl -lguide -lpthread`` or ...@@ -367,7 +367,7 @@ correctly (for example, for MKL this might be ``-lmkl -lguide -lpthread`` or
a .dll, and on OS-X it might be either a .dylib or a .so.) a .dll, and on OS-X it might be either a .dylib or a .so.)
This might be just a problem with the way Theano passes compilation This might be just a problem with the way Theano passes compilation
arguments to gcc, but the problem is not fixed yet. arguments to g++, but the problem is not fixed yet.
.. _gpu_linux: .. _gpu_linux:
......
...@@ -84,12 +84,12 @@ AddConfigVar('mode', ...@@ -84,12 +84,12 @@ AddConfigVar('mode',
'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE'), 'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE'),
in_c_key=False) in_c_key=False)
# Test whether or not gcc is present: disable C code if it is not. # Test whether or not g++ is present: disable C code if it is not.
# Using the dummy file descriptor below is a workaround for a crash experienced # Using the dummy file descriptor below is a workaround for a crash experienced
# in an unusual Python 2.4.4 Windows environment with the default stdin=None. # in an unusual Python 2.4.4 Windows environment with the default stdin=None.
dummy_stdin = open(os.devnull) dummy_stdin = open(os.devnull)
try: try:
subprocess.Popen('gcc', stdout=subprocess.PIPE, stderr=subprocess.PIPE, subprocess.Popen('g++', stdout=subprocess.PIPE, stderr=subprocess.PIPE,
stdin=dummy_stdin.fileno()) stdin=dummy_stdin.fileno())
# Keep the default linker the same as the one for the mode FAST_RUN # Keep the default linker the same as the one for the mode FAST_RUN
AddConfigVar('linker', AddConfigVar('linker',
...@@ -98,13 +98,13 @@ try: ...@@ -98,13 +98,13 @@ try:
'vm', 'cvm', 'vm_nogc', 'cvm_nogc'), 'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
in_c_key=False) in_c_key=False)
except OSError: except OSError:
# gcc is not present, linker should default to python only # g++ is not present, linker should default to python only
AddConfigVar('linker', AddConfigVar('linker',
"Default linker used if the theano flags mode is Mode or ProfileMode", "Default linker used if the theano flags mode is Mode or ProfileMode",
EnumStr('py', 'c|py', 'c', 'c|py_nogc', 'c&py', EnumStr('py', 'c|py', 'c', 'c|py_nogc', 'c&py',
'vm', 'cvm', 'vm_nogc', 'cvm_nogc'), 'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
in_c_key=False) in_c_key=False)
_logger.warning('GCC not detected ! Theano will be unable to execute ' _logger.warning('g++ not detected ! Theano will be unable to execute '
'optimized C-implementations (for both CPU and GPU) and will ' 'optimized C-implementations (for both CPU and GPU) and will '
'default to Python implementations. Performance will be severely ' 'default to Python implementations. Performance will be severely '
'degraded.') 'degraded.')
......
...@@ -622,6 +622,10 @@ class CLinker(link.Linker): ...@@ -622,6 +622,10 @@ class CLinker(link.Linker):
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]: for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_compile_args() try: ret += x.c_compile_args()
except utils.MethodNotDefined: pass except utils.MethodNotDefined: pass
c_compiler = self.c_compiler()
ret += c_compiler.compile_args()
ret=list(set(ret))#to remove duplicate ret=list(set(ret))#to remove duplicate
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]: for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: try:
...@@ -661,7 +665,7 @@ class CLinker(link.Linker): ...@@ -661,7 +665,7 @@ class CLinker(link.Linker):
raise Exception('Nodes have requested specific different compilers', raise Exception('Nodes have requested specific different compilers',
(c_compiler, x_compiler)) (c_compiler, x_compiler))
if (c_compiler is None): if (c_compiler is None):
return cmodule.gcc_module_compile_str return cmodule.GCC_compiler
else: return c_compiler else: return c_compiler
def header_dirs(self): def header_dirs(self):
...@@ -797,7 +801,8 @@ class CLinker(link.Linker): ...@@ -797,7 +801,8 @@ class CLinker(link.Linker):
The key returned by this function is of the form (version, signature) The key returned by this function is of the form (version, signature)
The signature has the following form: The signature has the following form:
{{{ {{{
'CLinker.cmodule_key', compilation args, libraries, config md5, 'CLinker.cmodule_key', compilation args, libraries,
header_dirs, config md5,
(op0, input_signature0, output_signature0), (op0, input_signature0, output_signature0),
(op1, input_signature1, output_signature1), (op1, input_signature1, output_signature1),
... ...
...@@ -857,11 +862,12 @@ class CLinker(link.Linker): ...@@ -857,11 +862,12 @@ class CLinker(link.Linker):
""" """
return self.cmodule_key_(self.env, self.no_recycling, return self.cmodule_key_(self.env, self.no_recycling,
compile_args=self.compile_args(), compile_args=self.compile_args(),
libraries=self.libraries() libraries=self.libraries(),
header_dirs=self.header_dirs(),
) )
@staticmethod @staticmethod
def cmodule_key_(env, no_recycling, compile_args=[], libraries=[], def cmodule_key_(env, no_recycling, compile_args=[], libraries=[],
insert_config_md5=True): header_dirs=[], insert_config_md5=True):
""" """
Do the actual computation of cmodule_key in a static method Do the actual computation of cmodule_key in a static method
to allow it to be reused in scalar.Composite.__eq__ to allow it to be reused in scalar.Composite.__eq__
...@@ -877,8 +883,24 @@ class CLinker(link.Linker): ...@@ -877,8 +883,24 @@ class CLinker(link.Linker):
# First we put the header, compile_args, library names and config md5 # First we put the header, compile_args, library names and config md5
# into the signature. # into the signature.
sig = ['CLinker.cmodule_key'] # will be cast to tuple on return sig = ['CLinker.cmodule_key'] # will be cast to tuple on return
if compile_args is not None: sig.append(tuple(compile_args)) if compile_args is not None:
if libraries is not None: sig.append(tuple(libraries)) # We must sort it as the order from a set are not guarantee.
# In particular, 2 sets with the same content can give different
# order depending in the order you put data in it.
# Sets are used to remove duplicate elements.
args = sorted(compile_args)
args = tuple(args)
sig.append(args)
if libraries is not None:
# see comments for compile_args
args = sorted(libraries)
args = tuple(args)
sig.append(args)
if header_dirs is not None:
args = sorted(header_dirs)
args = tuple(args)
sig.append(args)
# IMPORTANT: The 'md5' prefix is used to isolate the compilation # IMPORTANT: The 'md5' prefix is used to isolate the compilation
# parameters from the rest of the key. If you want to add more key # parameters from the rest of the key. If you want to add more key
...@@ -889,12 +911,6 @@ class CLinker(link.Linker): ...@@ -889,12 +911,6 @@ class CLinker(link.Linker):
else: else:
sig.append('md5: <omitted>') sig.append('md5: <omitted>')
# technically this should only be appended for gcc-compiled Ops
# and the flags of other compilers should be inserted here... but it's not clear how to
# do this.
if config.gcc.cxxflags:
sig.append(config.gcc.cxxflags)
error_on_play = [False] error_on_play = [False]
def in_sig(i, topological_pos, i_idx): def in_sig(i, topological_pos, i_idx):
# assert that every input to every node is one of' # assert that every input to every node is one of'
...@@ -1007,7 +1023,7 @@ class CLinker(link.Linker): ...@@ -1007,7 +1023,7 @@ class CLinker(link.Linker):
libs = self.libraries() libs = self.libraries()
preargs = self.compile_args() preargs = self.compile_args()
compiler_name = c_compiler.__name__ compiler_name = c_compiler.__name__
if compiler_name == 'nvcc_module_compile_str' and config.lib.amdlibm: if compiler_name == 'NVCC_compiler' and config.lib.amdlibm:
# This lib does not work correctly with nvcc in device code. # This lib does not work correctly with nvcc in device code.
# and newer version of g++ as 4.5.1. # and newer version of g++ as 4.5.1.
# example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined" # example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
...@@ -1024,7 +1040,7 @@ class CLinker(link.Linker): ...@@ -1024,7 +1040,7 @@ class CLinker(link.Linker):
try: try:
_logger.debug("LOCATION %s", str(location)) _logger.debug("LOCATION %s", str(location))
try: try:
module = c_compiler( module = c_compiler.compile_str(
module_name=mod.name, module_name=mod.name,
src_code=src_code, src_code=src_code,
location=location, location=location,
......
...@@ -1312,140 +1312,145 @@ def gcc_version(): ...@@ -1312,140 +1312,145 @@ def gcc_version():
return gcc_version_str return gcc_version_str
def gcc_module_compile_str(module_name, src_code, location=None, class GCC_compiler():
include_dirs=[], lib_dirs=[], libs=[], preargs=[]): @staticmethod
""" def compile_args():
:param module_name: string (this has been embedded in the src_code cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag]
return cxxflags
:param src_code: a complete c or c++ source listing for the module
@staticmethod
:param location: a pre-existing filesystem directory where the cpp file and def compile_str(module_name, src_code, location=None,
.so will be written include_dirs=[], lib_dirs=[], libs=[], preargs=[]):
"""
:param module_name: string (this has been embedded in the src_code
:param include_dirs: a list of include directory names (each gets prefixed :param src_code: a complete c or c++ source listing for the module
with -I)
:param lib_dirs: a list of library search path directory names (each gets :param location: a pre-existing filesystem directory where the
prefixed with -L) cpp file and .so will be written
:param libs: a list of libraries to link with (each gets prefixed with -l) :param include_dirs: a list of include directory names (each
gets prefixed with -I)
:param preargs: a list of extra compiler arguments :param lib_dirs: a list of library search path directory names
(each gets prefixed with -L)
:returns: dynamically-imported python module of the compiled code. :param libs: a list of libraries to link with (each gets
""" prefixed with -l)
#TODO: Do not do the dlimport in this function
if preargs is None: :param preargs: a list of extra compiler arguments
preargs = []
else:
preargs = list(preargs)
if sys.platform != 'win32': :returns: dynamically-imported python module of the compiled code.
# Under Windows it looks like fPIC is useless. Compiler warning: """
# '-fPIC ignored for target (all code is position independent)' #TODO: Do not do the dlimport in this function
preargs.append('-fPIC')
no_opt = False
include_dirs = include_dirs + std_include_dirs() if preargs is None:
libs = std_libs() + libs preargs = []
lib_dirs = std_lib_dirs() + lib_dirs else:
preargs = list(preargs)
if sys.platform != 'win32':
# Under Windows it looks like fPIC is useless. Compiler warning:
# '-fPIC ignored for target (all code is position independent)'
preargs.append('-fPIC')
no_opt = False
include_dirs = include_dirs + std_include_dirs()
libs = std_libs() + libs
lib_dirs = std_lib_dirs() + lib_dirs
#DSE Patch 1 for supporting OSX frameworks; add -framework Python
if sys.platform == 'darwin':
preargs.extend(['-undefined', 'dynamic_lookup'])
python_inc = distutils.sysconfig.get_python_inc()
# link with the framework library *if specifically requested*
# config.mac_framework_link is by default False, since on some mac
# installs linking with -framework causes a Bus Error
if (python_inc.count('Python.framework') > 0 and
config.cmodule.mac_framework_link):
preargs.extend(['-framework', 'Python'])
# Figure out whether the current Python executable is 32
# or 64 bit and compile accordingly.
n_bits = local_bitwidth()
preargs.extend(['-m%s' % n_bits])
_logger.debug("OS X: compiling for %s bit architecture", n_bits)
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
standard_lib=1)
python_lib = os.path.dirname(python_lib)
if python_lib not in lib_dirs:
lib_dirs.append(python_lib)
workdir = location
cppfilename = os.path.join(location, 'mod.cpp')
cppfile = file(cppfilename, 'w')
_logger.debug('Writing module C++ code to %s', cppfilename)
ofiles = []
rval = None
#DSE Patch 1 for supporting OSX frameworks; add -framework Python cppfile.write(src_code)
if sys.platform == 'darwin': # Avoid gcc warning "no newline at end of file".
preargs.extend(['-undefined', 'dynamic_lookup']) if not src_code.endswith('\n'):
python_inc = distutils.sysconfig.get_python_inc() cppfile.write('\n')
# link with the framework library *if specifically requested* cppfile.close()
# config.mac_framework_link is by default False, since on some mac
# installs linking with -framework causes a Bus Error
if (python_inc.count('Python.framework') > 0 and
config.cmodule.mac_framework_link):
preargs.extend(['-framework', 'Python'])
# Figure out whether the current Python executable is 32 or 64 bit and
# compile accordingly.
n_bits = local_bitwidth()
preargs.extend(['-m%s' % n_bits])
_logger.debug("OS X: compiling for %s bit architecture", n_bits)
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
standard_lib=1)
python_lib = os.path.dirname(python_lib)
if python_lib not in lib_dirs:
lib_dirs.append(python_lib)
workdir = location
cppfilename = os.path.join(location, 'mod.cpp')
cppfile = file(cppfilename, 'w')
_logger.debug('Writing module C++ code to %s', cppfilename)
ofiles = []
rval = None
cppfile.write(src_code) lib_filename = os.path.join(location, '%s.%s' %
# Avoid gcc warning "no newline at end of file". (module_name, get_lib_extension()))
if not src_code.endswith('\n'):
cppfile.write('\n')
cppfile.close()
lib_filename = os.path.join(location, '%s.%s' % _logger.debug('Generating shared lib %s', lib_filename)
(module_name, get_lib_extension())) cmd = ['g++', get_gcc_shared_library_arg(), '-g']
if no_opt:
cmd.extend(p for p in preargs if not p.startswith('-O'))
else:
cmd.extend(preargs)
cmd.extend('-I%s' % idir for idir in include_dirs)
cmd.extend(['-o', lib_filename])
cmd.append(cppfilename)
cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
cmd.extend(['-l%s' % l for l in libs])
#print >> sys.stderr, 'COMPILING W CMD', cmd
_logger.debug('Running cmd: %s', ' '.join(cmd))
def print_command_line_error():
# Print command line when a problem occurred.
print >> sys.stderr, ("Problem occurred during compilation with the "
"command line below:")
print >> sys.stderr, ' '.join(cmd)
_logger.debug('Generating shared lib %s', lib_filename) try:
cmd = ['g++', get_gcc_shared_library_arg(), '-g'] p = subprocess.Popen(cmd, stderr=subprocess.PIPE)
if no_opt: compile_stderr = p.communicate()[1]
cmd.extend(p for p in preargs if not p.startswith('-O')) except Exception:
else: # An exception can occur e.g. if `g++` is not found.
cmd.extend(preargs) print_command_line_error()
cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag] raise
#print >> sys.stderr, config.gcc.cxxflags.split(' ')
cmd.extend(cxxflags)
cmd.extend('-I%s' % idir for idir in include_dirs)
cmd.extend(['-o', lib_filename])
cmd.append(cppfilename)
cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
cmd.extend(['-l%s' % l for l in libs])
#print >> sys.stderr, 'COMPILING W CMD', cmd
_logger.debug('Running cmd: %s', ' '.join(cmd))
def print_command_line_error():
# Print command line when a problem occurred.
print >> sys.stderr, ("Problem occurred during compilation with the "
"command line below:")
print >> sys.stderr, ' '.join(cmd)
try: status = p.returncode
p = subprocess.Popen(cmd, stderr=subprocess.PIPE)
compile_stderr = p.communicate()[1] if status:
except Exception: print '==============================='
# An exception can occur e.g. if `g++` is not found. for i, l in enumerate(src_code.split('\n')):
print_command_line_error() #gcc put its messages to stderr, so we add ours now
raise print >> sys.stderr, '%05i\t%s' % (i + 1, l)
print '==============================='
status = p.returncode print_command_line_error()
# Print errors just below the command line.
if status: print compile_stderr
print '===============================' # We replace '\n' by '. ' in the error message because when Python
for i, l in enumerate(src_code.split('\n')): # prints the exception, having '\n' in the text makes it more difficult
#gcc put its messages to stderr, so we add ours now # to read.
print >> sys.stderr, '%05i\t%s' % (i + 1, l) raise Exception('Compilation failed (return status=%s): %s' %
print '===============================' (status, compile_stderr.replace('\n', '. ')))
print_command_line_error()
# Print errors just below the command line. #touch the __init__ file
print compile_stderr file(os.path.join(location, "__init__.py"), 'w').close()
# We replace '\n' by '. ' in the error message because when Python return dlimport(lib_filename)
# prints the exception, having '\n' in the text makes it more difficult
# to read.
raise Exception('Compilation failed (return status=%s): %s' %
(status, compile_stderr.replace('\n', '. ')))
#touch the __init__ file
file(os.path.join(location, "__init__.py"), 'w').close()
return dlimport(lib_filename)
def icc_module_compile_str(*args): def icc_module_compile_str(*args):
......
...@@ -70,7 +70,9 @@ except ImportError: ...@@ -70,7 +70,9 @@ except ImportError:
if not os.path.exists(loc): if not os.path.exists(loc):
os.mkdir(loc) os.mkdir(loc)
cmodule.gcc_module_compile_str('cutils_ext', code, location=loc) args = cmodule.GCC_compiler.compile_args()
cmodule.GCC_compiler.compile_str('cutils_ext', code, location=loc,
preargs=args)
from cutils_ext.cutils_ext import * from cutils_ext.cutils_ext import *
finally: finally:
......
...@@ -53,7 +53,9 @@ except ImportError: ...@@ -53,7 +53,9 @@ except ImportError:
loc = os.path.join(config.compiledir, dirname) loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc): if not os.path.exists(loc):
os.mkdir(loc) os.mkdir(loc)
cmodule.gcc_module_compile_str(dirname, code, location=loc) args = cmodule.GCC_compiler.compile_args()
cmodule.GCC_compiler.compile_str(dirname, code, location=loc,
preargs=args)
# Save version into the __init__.py file. # Save version into the __init__.py file.
init_py = os.path.join(loc, '__init__.py') init_py = os.path.join(loc, '__init__.py')
open(init_py, 'w').write('_version = %s\n' % version) open(init_py, 'w').write('_version = %s\n' % version)
......
...@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc, ...@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
# Add the theano cache directory's cuda_ndarray subdirectory to the # Add the theano cache directory's cuda_ndarray subdirectory to the
# list of places that are hard-coded into compiled modules' runtime # list of places that are hard-coded into compiled modules' runtime
# library search list. This works in conjunction with # library search list. This works in conjunction with
# nvcc_compiler.nvcc_module_compile_str which adds this folder during # nvcc_compiler.NVCC_compiler.compile_str which adds this folder during
# compilation with -L and also adds -lcuda_ndarray when compiling # compilation with -L and also adds -lcuda_ndarray when compiling
# modules. # modules.
nvcc_compiler.add_standard_rpath(cuda_ndarray_loc) nvcc_compiler.add_standard_rpath(cuda_ndarray_loc)
...@@ -117,11 +117,13 @@ try: ...@@ -117,11 +117,13 @@ try:
if not os.path.exists(cuda_ndarray_loc): if not os.path.exists(cuda_ndarray_loc):
os.makedirs(cuda_ndarray_loc) os.makedirs(cuda_ndarray_loc)
nvcc_compiler.nvcc_module_compile_str( compiler = nvcc_compiler.NVCC_compiler()
compiler.compile_str(
'cuda_ndarray', 'cuda_ndarray',
code, code,
location=cuda_ndarray_loc, location=cuda_ndarray_loc,
include_dirs=[cuda_path], libs=['cublas']) include_dirs=[cuda_path], libs=['cublas'],
preargs=compiler.compile_args())
from cuda_ndarray.cuda_ndarray import * from cuda_ndarray.cuda_ndarray import *
except Exception, e: except Exception, e:
_logger.error("Failed to compile cuda_ndarray.cu: %s", str(e)) _logger.error("Failed to compile cuda_ndarray.cu: %s", str(e))
...@@ -130,7 +132,7 @@ except Exception, e: ...@@ -130,7 +132,7 @@ except Exception, e:
if cuda_available: if cuda_available:
# If necessary, # If necessary,
# create a symlink called libcuda_ndarray.so # create a symlink called libcuda_ndarray.so
# which nvcc_module_compile_str uses when linking # which nvcc_compiler.NVCC_compiler uses when linking
# any module except "cuda_ndarray" itself. # any module except "cuda_ndarray" itself.
try: try:
open(libcuda_ndarray_so).close() open(libcuda_ndarray_so).close()
......
...@@ -7,6 +7,7 @@ import subprocess ...@@ -7,6 +7,7 @@ import subprocess
import sys import sys
import warnings import warnings
from theano.gof.cc import hash_from_file
from theano.gof.cmodule import (std_libs, std_lib_dirs, std_include_dirs, dlimport, from theano.gof.cmodule import (std_libs, std_lib_dirs, std_include_dirs, dlimport,
get_lib_extension, local_bitwidth) get_lib_extension, local_bitwidth)
...@@ -72,210 +73,226 @@ rpath_defaults = [] ...@@ -72,210 +73,226 @@ rpath_defaults = []
def add_standard_rpath(rpath): def add_standard_rpath(rpath):
rpath_defaults.append(rpath) rpath_defaults.append(rpath)
def nvcc_module_compile_str(
module_name, src_code,
location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
rpaths=rpath_defaults):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
:returns: dynamically-imported python module of the compiled code.
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
Otherwise nvcc never finish.
"""
rpaths = list(rpaths) class NVCC_compiler():
@staticmethod
if sys.platform=="win32": def compile_args():
# Remove some compilation args that cl.exe does not understand. """
# cl.exe is the compiler used by nvcc on Windows. This args will be received by compile_str() in the preargs paramter.
for a in ["-Wno-write-strings","-Wno-unused-label", They will also be included in the "hard" part of the key module.
"-Wno-unused-variable", "-fno-math-errno"]: """
if a in preargs: flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
preargs.remove(a) if config.nvcc.fastmath:
if preargs is None: flags.append('-use_fast_math')
preargs= [] cuda_ndarray_cuh_hash = hash_from_file(
else: preargs = list(preargs) os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
if sys.platform!='win32': flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)
preargs.append('-fPIC') return flags
no_opt = False
cuda_root = config.cuda.root @staticmethod
def compile_str(
#The include dirs gived by the user should have precedence over module_name, src_code,
#the standards ones. location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
include_dirs = include_dirs + std_include_dirs() rpaths=rpath_defaults):
if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: """
include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) :param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
libs = std_libs() + libs :param location: a pre-existing filesystem directory where the cpp file and .so will be written
if 'cudart' not in libs: :param include_dirs: a list of include directory names (each gets prefixed with -I)
libs.append('cudart') :param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
lib_dirs = std_lib_dirs() + lib_dirs :param preargs: a list of extra compiler arguments
if cuda_root: :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
lib_dirs.append(os.path.join(cuda_root, 'lib'))
:returns: dynamically-imported python module of the compiled code.
# from Benjamin Schrauwen April 14 2010
if sys.platform != 'darwin': :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
# No 64 bit CUDA libraries available on the mac, yet.. Otherwise nvcc never finish.
lib_dirs.append(os.path.join(cuda_root, 'lib64')) """
rpaths = list(rpaths)
if sys.platform == 'darwin':
# On the mac, nvcc is not able to link using -framework Python, so we have if sys.platform=="win32":
# manually add the correct library and paths # Remove some compilation args that cl.exe does not understand.
darwin_python_lib = commands.getoutput('python-config --ldflags') # cl.exe is the compiler used by nvcc on Windows.
else: for a in ["-Wno-write-strings","-Wno-unused-label",
# sometimes, the linker cannot find -lpython so we need to tell it "-Wno-unused-variable", "-fno-math-errno"]:
# explicitly where it is located if a in preargs:
# this returns somepath/lib/python2.x preargs.remove(a)
python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \ if preargs is None:
standard_lib=1) preargs= []
python_lib = os.path.dirname(python_lib) else: preargs = list(preargs)
if python_lib not in lib_dirs: if sys.platform!='win32':
lib_dirs.append(python_lib) preargs.append('-fPIC')
no_opt = False
cppfilename = os.path.join(location, 'mod.cu') cuda_root = config.cuda.root
cppfile = file(cppfilename, 'w')
#The include dirs gived by the user should have precedence over
_logger.debug('Writing module C++ code to %s', cppfilename) #the standards ones.
ofiles = [] include_dirs = include_dirs + std_include_dirs()
rval = None if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))
cppfile.write(src_code)
cppfile.close() libs = std_libs() + libs
lib_filename = os.path.join(location, '%s.%s' % if 'cudart' not in libs:
(module_name, get_lib_extension())) libs.append('cudart')
_logger.debug('Generating shared lib %s', lib_filename) lib_dirs = std_lib_dirs() + lib_dirs
# TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13', if cuda_root:
preargs1=[pa for pa in preargs if pa.startswith('-O') or pa.startswith('--maxrregcount=')]#nvcc argument lib_dirs.append(os.path.join(cuda_root, 'lib'))
preargs2=[pa for pa in preargs if pa not in preargs1]#other arguments
# from Benjamin Schrauwen April 14 2010
cmd = [nvcc_path, '-shared', '-g'] + preargs1 if sys.platform != 'darwin':
if config.nvcc.compiler_bindir: # No 64 bit CUDA libraries available on the mac, yet..
cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir]) lib_dirs.append(os.path.join(cuda_root, 'lib64'))
if sys.platform == 'win32':
# add flags for Microsoft compiler to create .pdb files if sys.platform == 'darwin':
preargs2.append('/Zi') # On the mac, nvcc is not able to link using -framework Python, so we have
cmd.extend(['-Xlinker', '/DEBUG']) # manually add the correct library and paths
darwin_python_lib = commands.getoutput('python-config --ldflags')
if sys.platform != 'win32':
if local_bitwidth() == 64:
cmd.append('-m64')
preargs2.append('-m64')
else: else:
cmd.append('-m32') # sometimes, the linker cannot find -lpython so we need to tell it
preargs2.append('-m32') # explicitly where it is located
# this returns somepath/lib/python2.x
if len(preargs2)>0: python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
cmd.extend(['-Xcompiler', ','.join(preargs2)]) standard_lib=1)
python_lib = os.path.dirname(python_lib)
if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,'lib')): if python_lib not in lib_dirs:
rpaths.append(os.path.join(config.cuda.root,'lib')) lib_dirs.append(python_lib)
if sys.platform != 'darwin':
# the 64bit CUDA libs are in the same files as are named by the function above cppfilename = os.path.join(location, 'mod.cu')
rpaths.append(os.path.join(config.cuda.root,'lib64')) cppfile = file(cppfilename, 'w')
if sys.platform != 'win32':
# the -rpath option is not understood by the Microsoft linker _logger.debug('Writing module C++ code to %s', cppfilename)
for rpath in rpaths: ofiles = []
cmd.extend(['-Xlinker',','.join(['-rpath',rpath])]) rval = None
cmd.extend([flag for flag in config.nvcc.flags.split(' ') if flag])
cmd.extend('-I%s'%idir for idir in include_dirs) cppfile.write(src_code)
cmd.extend(['-o',lib_filename]) cppfile.close()
cmd.append(os.path.split(cppfilename)[-1]) lib_filename = os.path.join(location, '%s.%s' %
cmd.extend(['-L%s'%ldir for ldir in lib_dirs]) (module_name, get_lib_extension()))
cmd.extend(['-l%s'%l for l in libs])
if module_name != 'cuda_ndarray': _logger.debug('Generating shared lib %s', lib_filename)
cmd.append("-lcuda_ndarray") # TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
if sys.platform == 'darwin': preargs1=[pa for pa in preargs if pa.startswith('-O') or pa.startswith('--maxrregcount=')]#nvcc argument
cmd.extend(darwin_python_lib.split()) preargs2=[pa for pa in preargs if pa not in preargs1]#other arguments
if sys.platform == 'darwin': cmd = [nvcc_path, '-shared', '-g'] + preargs1
if config.nvcc.compiler_bindir:
cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])
if sys.platform == 'win32':
# add flags for Microsoft compiler to create .pdb files
preargs2.append('/Zi')
cmd.extend(['-Xlinker', '/DEBUG'])
if sys.platform != 'win32':
if local_bitwidth() == 64:
cmd.append('-m64')
preargs2.append('-m64')
else:
cmd.append('-m32')
preargs2.append('-m32')
if len(preargs2)>0:
cmd.extend(['-Xcompiler', ','.join(preargs2)])
if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,'lib')):
rpaths.append(os.path.join(config.cuda.root,'lib'))
if sys.platform != 'darwin':
# the 64bit CUDA libs are in the same files as are named by the function above
rpaths.append(os.path.join(config.cuda.root,'lib64'))
if sys.platform != 'win32':
# the -rpath option is not understood by the Microsoft linker
for rpath in rpaths:
cmd.extend(['-Xlinker',','.join(['-rpath',rpath])])
cmd.extend('-I%s'%idir for idir in include_dirs)
cmd.extend(['-o',lib_filename])
cmd.append(os.path.split(cppfilename)[-1])
cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
cmd.extend(['-l%s'%l for l in libs])
if module_name != 'cuda_ndarray':
cmd.append("-lcuda_ndarray")
if sys.platform == 'darwin':
cmd.extend(darwin_python_lib.split())
if sys.platform == 'darwin':
done = False
while not done:
try:
indexof = cmd.index('-framework')
newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)])
cmd.pop(indexof) # Remove -framework
cmd.pop(indexof) # Remove argument to -framework
cmd.extend(newarg)
except ValueError, e:
done = True
# Remove "-u Symbol" arguments, since they are usually not relevant
# for the new compilation, even if they were used for compiling python.
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
done = False done = False
while not done: while not done:
try: try:
indexof = cmd.index('-framework') indexof = cmd.index('-u')
newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)]) cmd.pop(indexof) # Remove -u
cmd.pop(indexof) # Remove -framework cmd.pop(indexof) # Remove argument to -u
cmd.pop(indexof) # Remove argument to -framework
cmd.extend(newarg)
except ValueError, e: except ValueError, e:
done = True done = True
# Remove "-u Symbol" arguments, since they are usually not relevant # Fix for MacOS X.
# for the new compilation, even if they were used for compiling python. cmd = remove_python_framework_dir(cmd)
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
done = False #cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements
while not done: _logger.debug('Running cmd %s', ' '.join(cmd))
orig_dir = os.getcwd()
try: try:
indexof = cmd.index('-u') os.chdir(location)
cmd.pop(indexof) # Remove -u p = subprocess.Popen(
cmd.pop(indexof) # Remove argument to -u cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except ValueError, e: nvcc_stdout, nvcc_stderr = p.communicate()[:2]
done = True finally:
os.chdir(orig_dir)
# Fix for MacOS X.
cmd = remove_python_framework_dir(cmd) if nvcc_stdout:
# this doesn't happen to my knowledge
#cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements print >> sys.stderr, "DEBUG: nvcc STDOUT", nvcc_stdout
_logger.debug('Running cmd %s', ' '.join(cmd))
orig_dir = os.getcwd() for eline in nvcc_stderr.split('\n'):
try: if not eline:
os.chdir(location) continue
p = subprocess.Popen( if 'skipping incompatible' in eline: #ld is skipping an incompatible library
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
nvcc_stdout, nvcc_stderr = p.communicate()[:2]
finally:
os.chdir(orig_dir)
if nvcc_stdout:
# this doesn't happen to my knowledge
print >> sys.stderr, "DEBUG: nvcc STDOUT", nvcc_stdout
for eline in nvcc_stderr.split('\n'):
if not eline:
continue
if 'skipping incompatible' in eline: #ld is skipping an incompatible library
continue
if 'declared but never referenced' in eline:
continue
if 'statement is unreachable' in eline:
continue
_logger.info("NVCC: %s", eline)
if p.returncode:
# filter the output from the compiler
for l in nvcc_stderr.split('\n'):
if not l:
continue continue
# filter out the annoying declaration warnings if 'declared but never referenced' in eline:
continue
if 'statement is unreachable' in eline:
continue
_logger.info("NVCC: %s", eline)
try: if p.returncode:
if l[l.index(':'):].startswith(': warning: variable'): # filter the output from the compiler
continue for l in nvcc_stderr.split('\n'):
if l[l.index(':'):].startswith(': warning: label'): if not l:
continue continue
except Exception: # filter out the annoying declaration warnings
pass
print >> sys.stderr, l try:
print >> sys.stderr, '===============================' if l[l.index(':'):].startswith(': warning: variable'):
for i, l in enumerate(src_code.split('\n')): continue
print >> sys.stderr, i+1, l if l[l.index(':'):].startswith(': warning: label'):
raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd)) continue
except Exception:
#touch the __init__ file pass
file(os.path.join(location, "__init__.py"),'w').close() print >> sys.stderr, l
return dlimport(lib_filename) print >> sys.stderr, '==============================='
for i, l in enumerate(src_code.split('\n')):
print >> sys.stderr, i+1, l
raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd))
#touch the __init__ file
file(os.path.join(location, "__init__.py"),'w').close()
return dlimport(lib_filename)
def remove_python_framework_dir(cmd): def remove_python_framework_dir(cmd):
......
...@@ -12,7 +12,7 @@ try: ...@@ -12,7 +12,7 @@ try:
# We must do those import to be able to create the full doc when nvcc # We must do those import to be able to create the full doc when nvcc
# is not available # is not available
import cuda_ndarray.cuda_ndarray as cuda import cuda_ndarray.cuda_ndarray as cuda
from theano.sandbox.cuda.nvcc_compiler import nvcc_module_compile_str from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
import cuda_ndarray import cuda_ndarray
except ImportError: except ImportError:
pass pass
...@@ -370,13 +370,10 @@ class CudaNdarrayType(Type): ...@@ -370,13 +370,10 @@ class CudaNdarrayType(Type):
return (2,) # with assertion about refcounts return (2,) # with assertion about refcounts
def c_compiler(self): def c_compiler(self):
return nvcc_module_compile_str return NVCC_compiler
def c_compile_args(self): def c_compile_args(self):
ret = [] return []
if config.nvcc.fastmath:
ret.append('-use_fast_math')
return ret
# Register CudaNdarrayType to the OutputGuard list of known types # Register CudaNdarrayType to the OutputGuard list of known types
......
...@@ -50,10 +50,10 @@ except ImportError: ...@@ -50,10 +50,10 @@ except ImportError:
loc = os.path.join(config.compiledir, dirname) loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc): if not os.path.exists(loc):
os.mkdir(loc) os.mkdir(loc)
cmodule.gcc_module_compile_str(dirname, code, location=loc, preargs = ['-pthread', '-fwrapv', '-O2', '-fno-strict-aliasing']
preargs = ['-pthread','-fwrapv', preargs += cmodule.GCC_compiler.compile_args()
'-O2', cmodule.GCC_compiler.compile_str(dirname, code, location=loc,
'-fno-strict-aliasing']) preargs=preargs)
# Save version into the __init__.py file. # Save version into the __init__.py file.
init_py = os.path.join(loc, '__init__.py') init_py = os.path.join(loc, '__init__.py')
open(init_py, 'w').write('_version = %s\n' % version) open(init_py, 'w').write('_version = %s\n' % version)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论