提交 cff27c13 authored 作者: Frederic's avatar Frederic

make {nvcc,gcc}_module_compile_str a class with another function compile_args…

make {nvcc,gcc}_module_compile_str a class with another function compile_args that get added in the keys.
上级 2f2b424a
...@@ -622,6 +622,10 @@ class CLinker(link.Linker): ...@@ -622,6 +622,10 @@ class CLinker(link.Linker):
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]: for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: ret += x.c_compile_args() try: ret += x.c_compile_args()
except utils.MethodNotDefined: pass except utils.MethodNotDefined: pass
c_compiler = self.c_compiler()
ret += c_compiler.compile_args()
ret=list(set(ret))#to remove duplicate ret=list(set(ret))#to remove duplicate
for x in [y.type for y in self.variables] + [y.op for y in self.node_order]: for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
try: try:
...@@ -661,7 +665,7 @@ class CLinker(link.Linker): ...@@ -661,7 +665,7 @@ class CLinker(link.Linker):
raise Exception('Nodes have requested specific different compilers', raise Exception('Nodes have requested specific different compilers',
(c_compiler, x_compiler)) (c_compiler, x_compiler))
if (c_compiler is None): if (c_compiler is None):
return cmodule.gcc_module_compile_str return cmodule.GCC_compiler
else: return c_compiler else: return c_compiler
def header_dirs(self): def header_dirs(self):
...@@ -1007,7 +1011,7 @@ class CLinker(link.Linker): ...@@ -1007,7 +1011,7 @@ class CLinker(link.Linker):
libs = self.libraries() libs = self.libraries()
preargs = self.compile_args() preargs = self.compile_args()
compiler_name = c_compiler.__name__ compiler_name = c_compiler.__name__
if compiler_name == 'nvcc_module_compile_str' and config.lib.amdlibm: if compiler_name == 'NVCC_compiler' and config.lib.amdlibm:
# This lib does not work correctly with nvcc in device code. # This lib does not work correctly with nvcc in device code.
# and newer version of g++ as 4.5.1. # and newer version of g++ as 4.5.1.
# example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined" # example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
...@@ -1024,7 +1028,7 @@ class CLinker(link.Linker): ...@@ -1024,7 +1028,7 @@ class CLinker(link.Linker):
try: try:
_logger.debug("LOCATION %s", str(location)) _logger.debug("LOCATION %s", str(location))
try: try:
module = c_compiler( module = c_compiler.compile_str(
module_name=mod.name, module_name=mod.name,
src_code=src_code, src_code=src_code,
location=location, location=location,
......
...@@ -1312,140 +1312,147 @@ def gcc_version(): ...@@ -1312,140 +1312,147 @@ def gcc_version():
return gcc_version_str return gcc_version_str
def gcc_module_compile_str(module_name, src_code, location=None, class GCC_compiler():
include_dirs=[], lib_dirs=[], libs=[], preargs=[]): @staticmethod
""" def compile_args():
:param module_name: string (this has been embedded in the src_code return []
:param src_code: a complete c or c++ source listing for the module @staticmethod
def compile_str(module_name, src_code, location=None,
:param location: a pre-existing filesystem directory where the cpp file and include_dirs=[], lib_dirs=[], libs=[], preargs=[]):
.so will be written """
:param module_name: string (this has been embedded in the src_code
:param include_dirs: a list of include directory names (each gets prefixed :param src_code: a complete c or c++ source listing for the module
with -I)
:param lib_dirs: a list of library search path directory names (each gets :param location: a pre-existing filesystem directory where the
prefixed with -L) cpp file and .so will be written
:param libs: a list of libraries to link with (each gets prefixed with -l) :param include_dirs: a list of include directory names (each
gets prefixed with -I)
:param preargs: a list of extra compiler arguments :param lib_dirs: a list of library search path directory names
(each gets prefixed with -L)
:returns: dynamically-imported python module of the compiled code. :param libs: a list of libraries to link with (each gets
""" prefixed with -l)
#TODO: Do not do the dlimport in this function
if preargs is None: :param preargs: a list of extra compiler arguments
preargs = []
else:
preargs = list(preargs)
if sys.platform != 'win32': :returns: dynamically-imported python module of the compiled code.
# Under Windows it looks like fPIC is useless. Compiler warning: """
# '-fPIC ignored for target (all code is position independent)' #TODO: Do not do the dlimport in this function
preargs.append('-fPIC')
no_opt = False
include_dirs = include_dirs + std_include_dirs() if preargs is None:
libs = std_libs() + libs preargs = []
lib_dirs = std_lib_dirs() + lib_dirs else:
preargs = list(preargs)
if sys.platform != 'win32':
# Under Windows it looks like fPIC is useless. Compiler warning:
# '-fPIC ignored for target (all code is position independent)'
preargs.append('-fPIC')
no_opt = False
include_dirs = include_dirs + std_include_dirs()
libs = std_libs() + libs
lib_dirs = std_lib_dirs() + lib_dirs
#DSE Patch 1 for supporting OSX frameworks; add -framework Python
if sys.platform == 'darwin':
preargs.extend(['-undefined', 'dynamic_lookup'])
python_inc = distutils.sysconfig.get_python_inc()
# link with the framework library *if specifically requested*
# config.mac_framework_link is by default False, since on some mac
# installs linking with -framework causes a Bus Error
if (python_inc.count('Python.framework') > 0 and
config.cmodule.mac_framework_link):
preargs.extend(['-framework', 'Python'])
# Figure out whether the current Python executable is 32
# or 64 bit and compile accordingly.
n_bits = local_bitwidth()
preargs.extend(['-m%s' % n_bits])
_logger.debug("OS X: compiling for %s bit architecture", n_bits)
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
standard_lib=1)
python_lib = os.path.dirname(python_lib)
if python_lib not in lib_dirs:
lib_dirs.append(python_lib)
workdir = location
cppfilename = os.path.join(location, 'mod.cpp')
cppfile = file(cppfilename, 'w')
_logger.debug('Writing module C++ code to %s', cppfilename)
ofiles = []
rval = None
#DSE Patch 1 for supporting OSX frameworks; add -framework Python cppfile.write(src_code)
if sys.platform == 'darwin': # Avoid gcc warning "no newline at end of file".
preargs.extend(['-undefined', 'dynamic_lookup']) if not src_code.endswith('\n'):
python_inc = distutils.sysconfig.get_python_inc() cppfile.write('\n')
# link with the framework library *if specifically requested* cppfile.close()
# config.mac_framework_link is by default False, since on some mac
# installs linking with -framework causes a Bus Error
if (python_inc.count('Python.framework') > 0 and
config.cmodule.mac_framework_link):
preargs.extend(['-framework', 'Python'])
# Figure out whether the current Python executable is 32 or 64 bit and
# compile accordingly.
n_bits = local_bitwidth()
preargs.extend(['-m%s' % n_bits])
_logger.debug("OS X: compiling for %s bit architecture", n_bits)
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
standard_lib=1)
python_lib = os.path.dirname(python_lib)
if python_lib not in lib_dirs:
lib_dirs.append(python_lib)
workdir = location
cppfilename = os.path.join(location, 'mod.cpp')
cppfile = file(cppfilename, 'w')
_logger.debug('Writing module C++ code to %s', cppfilename)
ofiles = []
rval = None
cppfile.write(src_code) lib_filename = os.path.join(location, '%s.%s' %
# Avoid gcc warning "no newline at end of file". (module_name, get_lib_extension()))
if not src_code.endswith('\n'):
cppfile.write('\n')
cppfile.close()
lib_filename = os.path.join(location, '%s.%s' % _logger.debug('Generating shared lib %s', lib_filename)
(module_name, get_lib_extension())) cmd = ['g++', get_gcc_shared_library_arg(), '-g']
if no_opt:
cmd.extend(p for p in preargs if not p.startswith('-O'))
else:
cmd.extend(preargs)
cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag]
#print >> sys.stderr, config.gcc.cxxflags.split(' ')
cmd.extend(cxxflags)
cmd.extend('-I%s' % idir for idir in include_dirs)
cmd.extend(['-o', lib_filename])
cmd.append(cppfilename)
cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
cmd.extend(['-l%s' % l for l in libs])
#print >> sys.stderr, 'COMPILING W CMD', cmd
_logger.debug('Running cmd: %s', ' '.join(cmd))
def print_command_line_error():
# Print command line when a problem occurred.
print >> sys.stderr, ("Problem occurred during compilation with the "
"command line below:")
print >> sys.stderr, ' '.join(cmd)
_logger.debug('Generating shared lib %s', lib_filename) try:
cmd = ['g++', get_gcc_shared_library_arg(), '-g'] p = subprocess.Popen(cmd, stderr=subprocess.PIPE)
if no_opt: compile_stderr = p.communicate()[1]
cmd.extend(p for p in preargs if not p.startswith('-O')) except Exception:
else: # An exception can occur e.g. if `g++` is not found.
cmd.extend(preargs) print_command_line_error()
cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag] raise
#print >> sys.stderr, config.gcc.cxxflags.split(' ')
cmd.extend(cxxflags)
cmd.extend('-I%s' % idir for idir in include_dirs)
cmd.extend(['-o', lib_filename])
cmd.append(cppfilename)
cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
cmd.extend(['-l%s' % l for l in libs])
#print >> sys.stderr, 'COMPILING W CMD', cmd
_logger.debug('Running cmd: %s', ' '.join(cmd))
def print_command_line_error():
# Print command line when a problem occurred.
print >> sys.stderr, ("Problem occurred during compilation with the "
"command line below:")
print >> sys.stderr, ' '.join(cmd)
try: status = p.returncode
p = subprocess.Popen(cmd, stderr=subprocess.PIPE)
compile_stderr = p.communicate()[1] if status:
except Exception: print '==============================='
# An exception can occur e.g. if `g++` is not found. for i, l in enumerate(src_code.split('\n')):
print_command_line_error() #gcc put its messages to stderr, so we add ours now
raise print >> sys.stderr, '%05i\t%s' % (i + 1, l)
print '==============================='
status = p.returncode print_command_line_error()
# Print errors just below the command line.
if status: print compile_stderr
print '===============================' # We replace '\n' by '. ' in the error message because when Python
for i, l in enumerate(src_code.split('\n')): # prints the exception, having '\n' in the text makes it more difficult
#gcc put its messages to stderr, so we add ours now # to read.
print >> sys.stderr, '%05i\t%s' % (i + 1, l) raise Exception('Compilation failed (return status=%s): %s' %
print '===============================' (status, compile_stderr.replace('\n', '. ')))
print_command_line_error()
# Print errors just below the command line. #touch the __init__ file
print compile_stderr file(os.path.join(location, "__init__.py"), 'w').close()
# We replace '\n' by '. ' in the error message because when Python return dlimport(lib_filename)
# prints the exception, having '\n' in the text makes it more difficult
# to read.
raise Exception('Compilation failed (return status=%s): %s' %
(status, compile_stderr.replace('\n', '. ')))
#touch the __init__ file
file(os.path.join(location, "__init__.py"), 'w').close()
return dlimport(lib_filename)
def icc_module_compile_str(*args): def icc_module_compile_str(*args):
......
...@@ -70,7 +70,7 @@ except ImportError: ...@@ -70,7 +70,7 @@ except ImportError:
if not os.path.exists(loc): if not os.path.exists(loc):
os.mkdir(loc) os.mkdir(loc)
cmodule.gcc_module_compile_str('cutils_ext', code, location=loc) cmodule.GCC_compiler.compile_str('cutils_ext', code, location=loc)
from cutils_ext.cutils_ext import * from cutils_ext.cutils_ext import *
finally: finally:
......
...@@ -53,7 +53,7 @@ except ImportError: ...@@ -53,7 +53,7 @@ except ImportError:
loc = os.path.join(config.compiledir, dirname) loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc): if not os.path.exists(loc):
os.mkdir(loc) os.mkdir(loc)
cmodule.gcc_module_compile_str(dirname, code, location=loc) cmodule.GCC_compiler.compile_str(dirname, code, location=loc)
# Save version into the __init__.py file. # Save version into the __init__.py file.
init_py = os.path.join(loc, '__init__.py') init_py = os.path.join(loc, '__init__.py')
open(init_py, 'w').write('_version = %s\n' % version) open(init_py, 'w').write('_version = %s\n' % version)
......
...@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc, ...@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
# Add the theano cache directory's cuda_ndarray subdirectory to the # Add the theano cache directory's cuda_ndarray subdirectory to the
# list of places that are hard-coded into compiled modules' runtime # list of places that are hard-coded into compiled modules' runtime
# library search list. This works in conjunction with # library search list. This works in conjunction with
# nvcc_compiler.nvcc_module_compile_str which adds this folder during # nvcc_compiler.NVCC_compiler.compile_str which adds this folder during
# compilation with -L and also adds -lcuda_ndarray when compiling # compilation with -L and also adds -lcuda_ndarray when compiling
# modules. # modules.
nvcc_compiler.add_standard_rpath(cuda_ndarray_loc) nvcc_compiler.add_standard_rpath(cuda_ndarray_loc)
...@@ -117,7 +117,8 @@ try: ...@@ -117,7 +117,8 @@ try:
if not os.path.exists(cuda_ndarray_loc): if not os.path.exists(cuda_ndarray_loc):
os.makedirs(cuda_ndarray_loc) os.makedirs(cuda_ndarray_loc)
nvcc_compiler.nvcc_module_compile_str( compiler = nvcc_compiler.NVCC_compiler()
compiler.compile_str(
'cuda_ndarray', 'cuda_ndarray',
code, code,
location=cuda_ndarray_loc, location=cuda_ndarray_loc,
...@@ -130,7 +131,7 @@ except Exception, e: ...@@ -130,7 +131,7 @@ except Exception, e:
if cuda_available: if cuda_available:
# If necessary, # If necessary,
# create a symlink called libcuda_ndarray.so # create a symlink called libcuda_ndarray.so
# which nvcc_module_compile_str uses when linking # which nvcc_compiler.NVCC_compiler uses when linking
# any module except "cuda_ndarray" itself. # any module except "cuda_ndarray" itself.
try: try:
open(libcuda_ndarray_so).close() open(libcuda_ndarray_so).close()
......
...@@ -72,210 +72,226 @@ rpath_defaults = [] ...@@ -72,210 +72,226 @@ rpath_defaults = []
def add_standard_rpath(rpath): def add_standard_rpath(rpath):
rpath_defaults.append(rpath) rpath_defaults.append(rpath)
def nvcc_module_compile_str(
module_name, src_code,
location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
rpaths=rpath_defaults):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
:returns: dynamically-imported python module of the compiled code.
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
Otherwise nvcc never finish.
"""
rpaths = list(rpaths) class NVCC_compiler():
@staticmethod
if sys.platform=="win32": def compile_args():
# Remove some compilation args that cl.exe does not understand. """
# cl.exe is the compiler used by nvcc on Windows. This args will be received by compile_str() in the preargs paramter.
for a in ["-Wno-write-strings","-Wno-unused-label", They will also be included in the "hard" part of the key module.
"-Wno-unused-variable", "-fno-math-errno"]: """
if a in preargs: return []
preargs.remove(a) # flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
if preargs is None: # cuda_ndarray_cuh_hash = hash_from_file(
preargs= [] # os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
else: preargs = list(preargs) # cuda_macro = '-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash
if sys.platform!='win32': # return [cuda_macro]
preargs.append('-fPIC')
no_opt = False @staticmethod
cuda_root = config.cuda.root def compile_str(
module_name, src_code,
#The include dirs gived by the user should have precedence over location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
#the standards ones. rpaths=rpath_defaults):
include_dirs = include_dirs + std_include_dirs() """
if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs: :param module_name: string (this has been embedded in the src_code
include_dirs.append(os.path.abspath(os.path.split(__file__)[0])) :param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
libs = std_libs() + libs :param include_dirs: a list of include directory names (each gets prefixed with -I)
if 'cudart' not in libs: :param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
libs.append('cudart') :param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
lib_dirs = std_lib_dirs() + lib_dirs :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
if cuda_root:
lib_dirs.append(os.path.join(cuda_root, 'lib')) :returns: dynamically-imported python module of the compiled code.
# from Benjamin Schrauwen April 14 2010 :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
if sys.platform != 'darwin': Otherwise nvcc never finish.
# No 64 bit CUDA libraries available on the mac, yet.. """
lib_dirs.append(os.path.join(cuda_root, 'lib64'))
rpaths = list(rpaths)
if sys.platform == 'darwin': if sys.platform=="win32":
# On the mac, nvcc is not able to link using -framework Python, so we have # Remove some compilation args that cl.exe does not understand.
# manually add the correct library and paths # cl.exe is the compiler used by nvcc on Windows.
darwin_python_lib = commands.getoutput('python-config --ldflags') for a in ["-Wno-write-strings","-Wno-unused-label",
else: "-Wno-unused-variable", "-fno-math-errno"]:
# sometimes, the linker cannot find -lpython so we need to tell it if a in preargs:
# explicitly where it is located preargs.remove(a)
# this returns somepath/lib/python2.x if preargs is None:
python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \ preargs= []
standard_lib=1) else: preargs = list(preargs)
python_lib = os.path.dirname(python_lib) if sys.platform!='win32':
if python_lib not in lib_dirs: preargs.append('-fPIC')
lib_dirs.append(python_lib) no_opt = False
cuda_root = config.cuda.root
cppfilename = os.path.join(location, 'mod.cu')
cppfile = file(cppfilename, 'w') #The include dirs gived by the user should have precedence over
#the standards ones.
_logger.debug('Writing module C++ code to %s', cppfilename) include_dirs = include_dirs + std_include_dirs()
ofiles = [] if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
rval = None include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))
cppfile.write(src_code) libs = std_libs() + libs
cppfile.close() if 'cudart' not in libs:
lib_filename = os.path.join(location, '%s.%s' % libs.append('cudart')
(module_name, get_lib_extension()))
lib_dirs = std_lib_dirs() + lib_dirs
_logger.debug('Generating shared lib %s', lib_filename) if cuda_root:
# TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13', lib_dirs.append(os.path.join(cuda_root, 'lib'))
preargs1=[pa for pa in preargs if pa.startswith('-O') or pa.startswith('--maxrregcount=')]#nvcc argument
preargs2=[pa for pa in preargs if pa not in preargs1]#other arguments # from Benjamin Schrauwen April 14 2010
if sys.platform != 'darwin':
cmd = [nvcc_path, '-shared', '-g'] + preargs1 # No 64 bit CUDA libraries available on the mac, yet..
if config.nvcc.compiler_bindir: lib_dirs.append(os.path.join(cuda_root, 'lib64'))
cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])
if sys.platform == 'win32': if sys.platform == 'darwin':
# add flags for Microsoft compiler to create .pdb files # On the mac, nvcc is not able to link using -framework Python, so we have
preargs2.append('/Zi') # manually add the correct library and paths
cmd.extend(['-Xlinker', '/DEBUG']) darwin_python_lib = commands.getoutput('python-config --ldflags')
if sys.platform != 'win32':
if local_bitwidth() == 64:
cmd.append('-m64')
preargs2.append('-m64')
else: else:
cmd.append('-m32') # sometimes, the linker cannot find -lpython so we need to tell it
preargs2.append('-m32') # explicitly where it is located
# this returns somepath/lib/python2.x
if len(preargs2)>0: python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
cmd.extend(['-Xcompiler', ','.join(preargs2)]) standard_lib=1)
python_lib = os.path.dirname(python_lib)
if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,'lib')): if python_lib not in lib_dirs:
rpaths.append(os.path.join(config.cuda.root,'lib')) lib_dirs.append(python_lib)
if sys.platform != 'darwin':
# the 64bit CUDA libs are in the same files as are named by the function above cppfilename = os.path.join(location, 'mod.cu')
rpaths.append(os.path.join(config.cuda.root,'lib64')) cppfile = file(cppfilename, 'w')
if sys.platform != 'win32':
# the -rpath option is not understood by the Microsoft linker _logger.debug('Writing module C++ code to %s', cppfilename)
for rpath in rpaths: ofiles = []
cmd.extend(['-Xlinker',','.join(['-rpath',rpath])]) rval = None
cmd.extend([flag for flag in config.nvcc.flags.split(' ') if flag])
cmd.extend('-I%s'%idir for idir in include_dirs) cppfile.write(src_code)
cmd.extend(['-o',lib_filename]) cppfile.close()
cmd.append(os.path.split(cppfilename)[-1]) lib_filename = os.path.join(location, '%s.%s' %
cmd.extend(['-L%s'%ldir for ldir in lib_dirs]) (module_name, get_lib_extension()))
cmd.extend(['-l%s'%l for l in libs])
if module_name != 'cuda_ndarray': _logger.debug('Generating shared lib %s', lib_filename)
cmd.append("-lcuda_ndarray") # TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
if sys.platform == 'darwin': preargs1=[pa for pa in preargs if pa.startswith('-O') or pa.startswith('--maxrregcount=')]#nvcc argument
cmd.extend(darwin_python_lib.split()) preargs2=[pa for pa in preargs if pa not in preargs1]#other arguments
if sys.platform == 'darwin': cmd = [nvcc_path, '-shared', '-g'] + preargs1
if config.nvcc.compiler_bindir:
cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])
if sys.platform == 'win32':
# add flags for Microsoft compiler to create .pdb files
preargs2.append('/Zi')
cmd.extend(['-Xlinker', '/DEBUG'])
if sys.platform != 'win32':
if local_bitwidth() == 64:
cmd.append('-m64')
preargs2.append('-m64')
else:
cmd.append('-m32')
preargs2.append('-m32')
if len(preargs2)>0:
cmd.extend(['-Xcompiler', ','.join(preargs2)])
if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,'lib')):
rpaths.append(os.path.join(config.cuda.root,'lib'))
if sys.platform != 'darwin':
# the 64bit CUDA libs are in the same files as are named by the function above
rpaths.append(os.path.join(config.cuda.root,'lib64'))
if sys.platform != 'win32':
# the -rpath option is not understood by the Microsoft linker
for rpath in rpaths:
cmd.extend(['-Xlinker',','.join(['-rpath',rpath])])
cmd.extend([flag for flag in config.nvcc.flags.split(' ') if flag])
cmd.extend('-I%s'%idir for idir in include_dirs)
cmd.extend(['-o',lib_filename])
cmd.append(os.path.split(cppfilename)[-1])
cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
cmd.extend(['-l%s'%l for l in libs])
if module_name != 'cuda_ndarray':
cmd.append("-lcuda_ndarray")
if sys.platform == 'darwin':
cmd.extend(darwin_python_lib.split())
if sys.platform == 'darwin':
done = False
while not done:
try:
indexof = cmd.index('-framework')
newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)])
cmd.pop(indexof) # Remove -framework
cmd.pop(indexof) # Remove argument to -framework
cmd.extend(newarg)
except ValueError, e:
done = True
# Remove "-u Symbol" arguments, since they are usually not relevant
# for the new compilation, even if they were used for compiling python.
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
done = False done = False
while not done: while not done:
try: try:
indexof = cmd.index('-framework') indexof = cmd.index('-u')
newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)]) cmd.pop(indexof) # Remove -u
cmd.pop(indexof) # Remove -framework cmd.pop(indexof) # Remove argument to -u
cmd.pop(indexof) # Remove argument to -framework
cmd.extend(newarg)
except ValueError, e: except ValueError, e:
done = True done = True
# Remove "-u Symbol" arguments, since they are usually not relevant # Fix for MacOS X.
# for the new compilation, even if they were used for compiling python. cmd = remove_python_framework_dir(cmd)
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
done = False #cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements
while not done: _logger.debug('Running cmd %s', ' '.join(cmd))
orig_dir = os.getcwd()
try: try:
indexof = cmd.index('-u') os.chdir(location)
cmd.pop(indexof) # Remove -u p = subprocess.Popen(
cmd.pop(indexof) # Remove argument to -u cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except ValueError, e: nvcc_stdout, nvcc_stderr = p.communicate()[:2]
done = True finally:
os.chdir(orig_dir)
# Fix for MacOS X.
cmd = remove_python_framework_dir(cmd) if nvcc_stdout:
# this doesn't happen to my knowledge
#cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements print >> sys.stderr, "DEBUG: nvcc STDOUT", nvcc_stdout
_logger.debug('Running cmd %s', ' '.join(cmd))
orig_dir = os.getcwd() for eline in nvcc_stderr.split('\n'):
try: if not eline:
os.chdir(location) continue
p = subprocess.Popen( if 'skipping incompatible' in eline: #ld is skipping an incompatible library
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
nvcc_stdout, nvcc_stderr = p.communicate()[:2]
finally:
os.chdir(orig_dir)
if nvcc_stdout:
# this doesn't happen to my knowledge
print >> sys.stderr, "DEBUG: nvcc STDOUT", nvcc_stdout
for eline in nvcc_stderr.split('\n'):
if not eline:
continue
if 'skipping incompatible' in eline: #ld is skipping an incompatible library
continue
if 'declared but never referenced' in eline:
continue
if 'statement is unreachable' in eline:
continue
_logger.info("NVCC: %s", eline)
if p.returncode:
# filter the output from the compiler
for l in nvcc_stderr.split('\n'):
if not l:
continue continue
# filter out the annoying declaration warnings if 'declared but never referenced' in eline:
continue
if 'statement is unreachable' in eline:
continue
_logger.info("NVCC: %s", eline)
try: if p.returncode:
if l[l.index(':'):].startswith(': warning: variable'): # filter the output from the compiler
continue for l in nvcc_stderr.split('\n'):
if l[l.index(':'):].startswith(': warning: label'): if not l:
continue continue
except Exception: # filter out the annoying declaration warnings
pass
print >> sys.stderr, l try:
print >> sys.stderr, '===============================' if l[l.index(':'):].startswith(': warning: variable'):
for i, l in enumerate(src_code.split('\n')): continue
print >> sys.stderr, i+1, l if l[l.index(':'):].startswith(': warning: label'):
raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd)) continue
except Exception:
#touch the __init__ file pass
file(os.path.join(location, "__init__.py"),'w').close() print >> sys.stderr, l
return dlimport(lib_filename) print >> sys.stderr, '==============================='
for i, l in enumerate(src_code.split('\n')):
print >> sys.stderr, i+1, l
raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd))
#touch the __init__ file
file(os.path.join(location, "__init__.py"),'w').close()
return dlimport(lib_filename)
def remove_python_framework_dir(cmd): def remove_python_framework_dir(cmd):
......
...@@ -12,7 +12,7 @@ try: ...@@ -12,7 +12,7 @@ try:
# We must do those import to be able to create the full doc when nvcc # We must do those import to be able to create the full doc when nvcc
# is not available # is not available
import cuda_ndarray.cuda_ndarray as cuda import cuda_ndarray.cuda_ndarray as cuda
from theano.sandbox.cuda.nvcc_compiler import nvcc_module_compile_str from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
import cuda_ndarray import cuda_ndarray
except ImportError: except ImportError:
pass pass
...@@ -370,7 +370,7 @@ class CudaNdarrayType(Type): ...@@ -370,7 +370,7 @@ class CudaNdarrayType(Type):
return (2,) # with assertion about refcounts return (2,) # with assertion about refcounts
def c_compiler(self): def c_compiler(self):
return nvcc_module_compile_str return NVCC_compiler
def c_compile_args(self): def c_compile_args(self):
ret = [] ret = []
......
...@@ -50,8 +50,8 @@ except ImportError: ...@@ -50,8 +50,8 @@ except ImportError:
loc = os.path.join(config.compiledir, dirname) loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc): if not os.path.exists(loc):
os.mkdir(loc) os.mkdir(loc)
cmodule.gcc_module_compile_str(dirname, code, location=loc, cmodule.GCC_compiler.compile_str(dirname, code, location=loc,
preargs = ['-pthread','-fwrapv', preargs=['-pthread', '-fwrapv',
'-O2', '-O2',
'-fno-strict-aliasing']) '-fno-strict-aliasing'])
# Save version into the __init__.py file. # Save version into the __init__.py file.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论