提交 7fe951d9 authored 作者: lamblin's avatar lamblin

Merge pull request #644 from nouiz/cache_key

Cache key
......@@ -10,10 +10,9 @@ from theano.gof.cc import get_module_cache
_logger = logging.getLogger('theano.bin.theano-cache')
_logger.setLevel(logging.WARN)
if len(sys.argv) == 1:
print config.compiledir
elif sys.argv[1] in ('clear'):
elif sys.argv[1] == 'clear':
# We skip the refresh on module cache creation because the refresh will
# be done when calling clear afterwards.
cache = get_module_cache(init_args=dict(do_refresh=False))
......@@ -29,9 +28,9 @@ elif sys.argv[1] in ('clear'):
config.compiledir)
_logger.debug('Remaining elements (%s): %s' %
(len(items), ', '.join(items)))
elif sys.argv[1] in ('list'):
elif sys.argv[1] == 'list':
theano.gof.compiledir.print_compiledir_content()
elif sys.argv[1] in ('cleanup'):
elif sys.argv[1] == 'cleanup':
theano.gof.compiledir.cleanup()
elif sys.argv[1] == 'unlock':
theano.gof.compilelock.force_unlock()
......
......@@ -81,7 +81,7 @@ Reference
Initialize object attributes.
.. function:: function(inputs, outputs, mode=None, updates=None, givens=None, accept_inplace=False, name=None)
.. function:: function(inputs, outputs, mode=None, updates=None, givens=None, no_default_updates=False, accept_inplace=False, name=None, rebuild_strict=True, allow_input_downcast=None, profile=None, on_unused_input='raise')
Return a callable object that will calculate `outputs` from `inputs`.
......@@ -121,6 +121,30 @@ Reference
:param name: an optional name for this function.
The profile mode will print the time spent in this function.
:param rebuild_strict: True (Default) is the safer and better tested setting, in which case
`givens` must substitute new variables with the same Type as the variables they replace.
False is a you-better-know-what-you-are-doing setting, that permits `givens` to replace
variables with new variables of any Type. The consequence of changing a Type is that all
results depending on that variable may have a different Type too (the graph is rebuilt from
inputs to outputs). If one of the new types does not make sense for one of the Ops in the
graph, an Exception will be raised.
:type allow_input_downcast: Boolean or None
:param allow_input_downcast: True means that the values passed as
inputs when calling the function can be silently downcasted to fit
the dtype of the corresponding Variable, which may lose precision.
False means that it will only be cast to a more general, or
precise, type. None (default) is almost like False, but allows
downcasting of Python float scalars to floatX.
:type profile: None, True, or ProfileStats instance
:param profile: accumulate profiling information into a given ProfileStats
instance. If argument is `True` then a new ProfileStats instance will be
used. This profiling object will be available via self.profile.
:param on_unused_input: What to do if a variable in the 'inputs' list is
not used in the graph. Possible values are 'raise', 'warn', and 'ignore'.
:rtype: Function instance
:returns: a callable object that will compute the outputs (given the inputs)
......
......@@ -44,10 +44,6 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
:param name: an optional name for this function. The profile mode will print the time spent in this function.
:rtype: Function instance
:returns: a callable object that will compute the outputs (given the inputs)
and update the implicit function arguments according to the `updates`.
:param rebuild_strict: True (Default) is the safer and better tested setting, in which case
`givens` must substitute new variables with the same Type as the variables they replace.
False is a you-better-know-what-you-are-doing setting, that permits `givens` to replace
......@@ -72,6 +68,10 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
:param on_unused_input: What to do if a variable in the 'inputs' list is
not used in the graph. Possible values are 'raise', 'warn', 'ignore' and None.
:rtype: Function instance
:returns: a callable object that will compute the outputs (given the inputs)
and update the implicit function arguments according to the `updates`.
:note: Regarding givens: Be careful to make sure that these substitutions are
independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in
another expression is undefined. Replacements specified with givens are different from
......
......@@ -1106,8 +1106,8 @@ class FunctionMaker(object):
blockers=[i.variable for i in inputs])
msg = ("theano.function was asked to create a function computing "
"outputs given certain inputs, but one of the provided "
"input variables is not part of the computational graph "
"outputs given certain inputs, but the provided input "
"variable at index %i is not part of the computational graph "
"needed to compute the outputs: %s.\n%s")
warn_msg = ("To make this warning into an error, you can pass the "
"parameter on_unused_input='raise' to theano.function. "
......@@ -1119,9 +1119,9 @@ class FunctionMaker(object):
for i in inputs:
if ((i.variable not in used_inputs) and (i.update is None)):
if on_unused_input == 'warn':
warnings.warn(msg % (i.variable, warn_msg), stacklevel=6)
warnings.warn(msg % (inputs.index(i), i.variable, warn_msg), stacklevel=6)
elif on_unused_input == 'raise':
raise UnusedInputError(msg % (i.variable, err_msg))
raise UnusedInputError(msg % (inputs.index(i), i.variable, err_msg))
else:
raise ValueError(("Invalid value for keyword "
"on_unused_input of theano.function: '%s'. "
......
......@@ -11,6 +11,9 @@ import sys
from itertools import izip
import numpy
if sys.version_info[:2] >= (2, 5):
import hashlib
......@@ -918,7 +921,7 @@ class CLinker(link.Linker):
The signature has the following form:
{{{
'CLinker.cmodule_key', compilation args, libraries,
header_dirs, config md5,
header_dirs, numpy ABI version, config md5,
(op0, input_signature0, output_signature0),
(op1, input_signature1, output_signature1),
...
......@@ -986,11 +989,12 @@ class CLinker(link.Linker):
compile_args=self.compile_args(),
libraries=self.libraries(),
header_dirs=self.header_dirs(),
c_compiler=self.c_compiler(),
)
@staticmethod
def cmodule_key_(env, no_recycling, compile_args=None, libraries=None,
header_dirs=None, insert_config_md5=True):
header_dirs=None, insert_config_md5=True, c_compiler=None):
"""
Do the actual computation of cmodule_key in a static method
to allow it to be reused in scalar.Composite.__eq__
......@@ -1032,6 +1036,13 @@ class CLinker(link.Linker):
args = tuple(args)
sig.append(args)
#We must always add the numpy ABI version here as
# DynamicModule always add the include <numpy/arrayobject.h>
sig.append('NPY_ABI_VERSION=0x%X' %
numpy.core.multiarray._get_ndarray_c_version())
if c_compiler:
sig.append('c_compiler_str=' + c_compiler.version_str())
# IMPORTANT: The 'md5' prefix is used to isolate the compilation
# parameters from the rest of the key. If you want to add more key
# elements, they should be before this md5 hash if and only if they
......
......@@ -25,6 +25,7 @@ from theano.gof.cc import hash_from_code
# we will abuse the lockfile mechanism when reading and writing the registry
import compilelock
from compiledir import gcc_version_str
from theano.configparser import AddConfigVar, BoolParam
......@@ -314,6 +315,7 @@ def get_module_hash(src_code, key):
2. The version part of the key.
3. The compiler options defined in `key` (command line parameters and
libraries to link against).
4. The NumPy ABI version.
"""
# `to_hash` will contain any element such that we know for sure that if
# it changes, then the module hash should be different.
......@@ -347,6 +349,9 @@ def get_module_hash(src_code, key):
# This is the md5 hash of the config options. We can stop
# here.
break
elif (key_element.startswith('NPY_ABI_VERSION=0x') or
key_element.startswith('c_compiler_str=')):
to_hash.append(key_element)
else:
raise AssertionError(error_msg)
else:
......@@ -1403,30 +1408,15 @@ def std_lib_dirs():
return std_lib_dirs_and_libs()[1]
# Using the dummy file descriptors below is a workaround for a crash
# experienced in an unusual Python 2.4.4 Windows environment with the default
# None values.
dummy_in = open(os.devnull)
dummy_err = open(os.devnull, 'w')
p = None
try:
p = subprocess.Popen(['g++', '-dumpversion'], stdout=subprocess.PIPE,
stdin=dummy_in.fileno(), stderr=dummy_err.fileno())
p.wait()
gcc_version_str = p.stdout.readline().strip()
except OSError:
# Typically means gcc cannot be found.
gcc_version_str = 'GCC_NOT_FOUND'
del p
del dummy_in
del dummy_err
def gcc_version():
return gcc_version_str
class GCC_compiler(object):
@staticmethod
def version_str():
return "g++ " + gcc_version_str
@staticmethod
def compile_args():
cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag]
......
......@@ -3,6 +3,7 @@ import errno
import os
import platform
import re
import subprocess
import shutil
import sys
import textwrap
......@@ -13,11 +14,30 @@ import theano
from theano.configparser import config, AddConfigVar, ConfigParam, StrParam
from theano.gof.utils import flatten
# Using the dummy file descriptors below is a workaround for a crash
# experienced in an unusual Python 2.4.4 Windows environment with the default
# None values.
dummy_in = open(os.devnull)
dummy_err = open(os.devnull, 'w')
p = None
try:
p = subprocess.Popen(['g++', '-dumpversion'], stdout=subprocess.PIPE,
stdin=dummy_in.fileno(), stderr=dummy_err.fileno())
p.wait()
gcc_version_str = p.stdout.readline().strip()
except OSError:
# Typically means gcc cannot be found.
gcc_version_str = 'GCC_NOT_FOUND'
del p
del dummy_in
del dummy_err
compiledir_format_dict = {"platform": platform.platform(),
"processor": platform.processor(),
"python_version": platform.python_version(),
"theano_version": theano.__version__,
"numpy_version": numpy.__version__,
"g++": gcc_version_str.replace(" ", "_"),
}
compiledir_format_keys = ", ".join(compiledir_format_dict.keys())
default_compiledir_format =\
......@@ -115,8 +135,11 @@ def cleanup():
"""
Delete keys in old format from the compiledir.
We define keys in old format as keys that have an ndarray in them.
Old clean up include key in old format:
1) keys that have an ndarray in them.
Now we use a hash in the keys of the constant data.
2) key that don't have the numpy ABI version in them
3) They do not have a compile version string
If there is no key left for a compiled module, we delete the module.
"""
......@@ -131,10 +154,20 @@ def cleanup():
try:
keydata = cPickle.load(file)
for key in list(keydata.keys):
have_npy_abi_version = False
have_c_compiler = False
for obj in flatten(key):
if isinstance(obj, numpy.ndarray):
keydata.remove_key(key)
break
elif isinstance(obj, basestring):
if obj.startswith('NPY_ABI_VERSION=0x'):
have_npy_abi_version = True
elif obj.startswith('c_compiler_str='):
have_c_compiler = True
if not have_npy_abi_version or not have_c_compiler:
keydata.remove_key(key)
if len(keydata.keys) == 0:
shutil.rmtree(os.path.join(compiledir, directory))
......
......@@ -8,16 +8,18 @@ import sys
import warnings
from theano.gof.cc import hash_from_file
from theano.gof.cmodule import (std_libs, std_lib_dirs, std_include_dirs, dlimport,
from theano.gof.cmodule import (std_libs, std_lib_dirs,
std_include_dirs, dlimport,
get_lib_extension, local_bitwidth)
_logger=logging.getLogger("theano.sandbox.cuda.nvcc_compiler")
_logger = logging.getLogger("theano.sandbox.cuda.nvcc_compiler")
_logger.setLevel(logging.WARN)
from theano.configparser import config, AddConfigVar, StrParam, BoolParam
AddConfigVar('nvcc.compiler_bindir',
"If defined, nvcc compiler driver will seek g++ and gcc in this directory",
"If defined, nvcc compiler driver will seek g++ and gcc"
" in this directory",
StrParam(""))
AddConfigVar('cuda.nvccflags',
......@@ -40,6 +42,8 @@ AddConfigVar('nvcc.fastmath',
nvcc_path = 'nvcc'
nvcc_version = None
def is_nvcc_available():
"""Return True iff the nvcc compiler is found."""
try:
......@@ -47,34 +51,42 @@ def is_nvcc_available():
stderr=subprocess.PIPE)
p.wait()
s = p.stdout.readlines()[-1].split(',')[1].strip().split()
assert s[0]=='release'
assert s[0] == 'release'
global nvcc_version
nvcc_version = s[1]
return True
except Exception:
#try to find nvcc into cuda.root
p = os.path.join(config.cuda.root,'bin','nvcc')
p = os.path.join(config.cuda.root, 'bin', 'nvcc')
if os.path.exists(p):
global nvcc_path
nvcc_path = p
return True
else: return False
else:
return False
def set_cuda_root():
s = os.getenv("PATH")
if not s:
return
for dir in s.split(os.path.pathsep):
if os.path.exists(os.path.join(dir,"nvcc")):
if os.path.exists(os.path.join(dir, "nvcc")):
config.cuda.root = os.path.split(dir)[0]
return
rpath_defaults = []
def add_standard_rpath(rpath):
rpath_defaults.append(rpath)
class NVCC_compiler(object):
@staticmethod
def version_str():
return "nvcc " + nvcc_version
@staticmethod
def compile_args():
"""
......@@ -94,35 +106,41 @@ class NVCC_compiler(object):
module_name, src_code,
location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
rpaths=rpath_defaults):
"""
:param module_name: string (this has been embedded in the src_code
""":param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param location: a pre-existing filesystem directory where the
cpp file and .so will be written
:param include_dirs: a list of include directory names
(each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names
(each gets prefixed with -L)
:param libs: a list of libraries to link with
(each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
:param rpaths: list of rpaths to use with Xlinker.
Defaults to `rpath_defaults`.
:returns: dynamically-imported python module of the compiled code.
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
Otherwise nvcc never finish.
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the
real directory Otherwise nvcc never finish.
"""
rpaths = list(rpaths)
if sys.platform=="win32":
if sys.platform == "win32":
# Remove some compilation args that cl.exe does not understand.
# cl.exe is the compiler used by nvcc on Windows.
for a in ["-Wno-write-strings","-Wno-unused-label",
for a in ["-Wno-write-strings", "-Wno-unused-label",
"-Wno-unused-variable", "-fno-math-errno"]:
if a in preargs:
preargs.remove(a)
if preargs is None:
preargs= []
else: preargs = list(preargs)
if sys.platform!='win32':
preargs = []
else:
preargs = list(preargs)
if sys.platform != 'win32':
preargs.append('-fPIC')
no_opt = False
cuda_root = config.cuda.root
......@@ -146,10 +164,10 @@ class NVCC_compiler(object):
# No 64 bit CUDA libraries available on the mac, yet..
lib_dirs.append(os.path.join(cuda_root, 'lib64'))
if sys.platform == 'darwin':
# On the mac, nvcc is not able to link using -framework Python, so we have
# manually add the correct library and paths
# On the mac, nvcc is not able to link using -framework
# Python, so we have manually add the correct library and
# paths
darwin_python_lib = commands.getoutput('python-config --ldflags')
else:
# sometimes, the linker cannot find -lpython so we need to tell it
......@@ -174,9 +192,14 @@ class NVCC_compiler(object):
(module_name, get_lib_extension()))
_logger.debug('Generating shared lib %s', lib_filename)
# TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
preargs1=[pa for pa in preargs if pa.startswith('-O') or pa.startswith('--maxrregcount=')]#nvcc argument
preargs2=[pa for pa in preargs if pa not in preargs1]#other arguments
# TODO: Why do these args cause failure on gtx285 that has 1.3
# compute capability? '--gpu-architecture=compute_13',
# '--gpu-code=compute_13',
#nvcc argument
preargs1 = [pa for pa in preargs
if pa.startswith('-O') or pa.startswith('--maxrregcount=')]
preargs2 = [pa for pa in preargs
if pa not in preargs1] # other arguments
cmd = [nvcc_path, '-shared', '-g'] + preargs1
if config.nvcc.compiler_bindir:
......@@ -195,23 +218,25 @@ class NVCC_compiler(object):
cmd.append('-m32')
preargs2.append('-m32')
if len(preargs2)>0:
if len(preargs2) > 0:
cmd.extend(['-Xcompiler', ','.join(preargs2)])
if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,'lib')):
rpaths.append(os.path.join(config.cuda.root,'lib'))
if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,
'lib')):
rpaths.append(os.path.join(config.cuda.root, 'lib'))
if sys.platform != 'darwin':
# the 64bit CUDA libs are in the same files as are named by the function above
rpaths.append(os.path.join(config.cuda.root,'lib64'))
# the 64bit CUDA libs are in the same files as are
# named by the function above
rpaths.append(os.path.join(config.cuda.root, 'lib64'))
if sys.platform != 'win32':
# the -rpath option is not understood by the Microsoft linker
for rpath in rpaths:
cmd.extend(['-Xlinker',','.join(['-rpath',rpath])])
cmd.extend('-I%s'%idir for idir in include_dirs)
cmd.extend(['-o',lib_filename])
cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])])
cmd.extend('-I%s' % idir for idir in include_dirs)
cmd.extend(['-o', lib_filename])
cmd.append(os.path.split(cppfilename)[-1])
cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
cmd.extend(['-l%s'%l for l in libs])
cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
cmd.extend(['-l%s' % l for l in libs])
if module_name != 'cuda_ndarray':
cmd.append("-lcuda_ndarray")
if sys.platform == 'darwin':
......@@ -229,9 +254,10 @@ class NVCC_compiler(object):
except ValueError, e:
done = True
# Remove "-u Symbol" arguments, since they are usually not relevant
# for the new compilation, even if they were used for compiling python.
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
# Remove "-u Symbol" arguments, since they are usually not
# relevant for the new compilation, even if they were used for
# compiling python. If they are necessary, the nvcc syntax is
# "-U Symbol" with a capital U.
done = False
while not done:
try:
......@@ -244,14 +270,15 @@ class NVCC_compiler(object):
# Fix for MacOS X.
cmd = remove_python_framework_dir(cmd)
# CUDA Toolkit v4.1 Known Issues:
# Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option to nvcc
# this option is not recognized and generates an error
# Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option
# to nvcc this option is not recognized and generates an error
# http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie
# Passing -Xlinker -pie stops -no_pie from getting passed
if sys.platform == 'darwin' and nvcc_version >= '4.1':
cmd.extend(['-Xlinker', '-pie'])
#cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements
#cmd.append("--ptxas-options=-v") #uncomment this to see
#register and shared-mem requirements
_logger.debug('Running cmd %s', ' '.join(cmd))
orig_dir = os.getcwd()
try:
......@@ -269,7 +296,8 @@ class NVCC_compiler(object):
for eline in nvcc_stderr.split('\n'):
if not eline:
continue
if 'skipping incompatible' in eline: #ld is skipping an incompatible library
if 'skipping incompatible' in eline:
#ld is skipping an incompatible library
continue
if 'declared but never referenced' in eline:
continue
......@@ -294,11 +322,12 @@ class NVCC_compiler(object):
print >> sys.stderr, l
print >> sys.stderr, '==============================='
for i, l in enumerate(src_code.split('\n')):
print >> sys.stderr, i+1, l
raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd))
print >> sys.stderr, i + 1, l
raise Exception('nvcc return status', p.returncode,
'for cmd', ' '.join(cmd))
#touch the __init__ file
file(os.path.join(location, "__init__.py"),'w').close()
file(os.path.join(location, "__init__.py"), 'w').close()
return dlimport(lib_filename)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论