Merge pull request #644 from nouiz/cache_key

Cache key

Merge pull request #644 from nouiz/cache_key
7fe951d9 · lamblin · 95e9d0f5 · 5d427eab · 7fe951d9 · 7fe951d9
--- a/bin/theano-cache
+++ b/bin/theano-cache
@@ -10,10 +10,9 @@ from theano.gof.cc import get_module_cache
 _logger = logging.getLogger('theano.bin.theano-cache')
 _logger.setLevel(logging.WARN)

-
 if len(sys.argv) == 1:
    print config.compiledir
-elif sys.argv[1] in ('clear'):
+elif sys.argv[1] == 'clear':
    # We skip the refresh on module cache creation because the refresh will
    # be done when calling clear afterwards.
    cache = get_module_cache(init_args=dict(do_refresh=False))
@@ -29,9 +28,9 @@ elif sys.argv[1] in ('clear'):
                        config.compiledir)
        _logger.debug('Remaining elements (%s): %s' %
                      (len(items), ', '.join(items)))
-elif sys.argv[1] in ('list'):
+elif sys.argv[1] == 'list':
    theano.gof.compiledir.print_compiledir_content()
-elif sys.argv[1] in ('cleanup'):
+elif sys.argv[1] == 'cleanup':
    theano.gof.compiledir.cleanup()
 elif sys.argv[1] == 'unlock':
    theano.gof.compilelock.force_unlock()

--- a/doc/library/compile/function.txt
+++ b/doc/library/compile/function.txt
@@ -81,7 +81,7 @@ Reference
        Initialize object attributes.


-.. function:: function(inputs, outputs, mode=None, updates=None, givens=None, accept_inplace=False, name=None)
+.. function:: function(inputs, outputs, mode=None, updates=None, givens=None, no_default_updates=False, accept_inplace=False, name=None, rebuild_strict=True, allow_input_downcast=None, profile=None, on_unused_input='raise')

    Return a callable object that will calculate `outputs` from `inputs`.

@@ -121,6 +121,30 @@ Reference
    :param name: an optional name for this function.
      The profile mode will print the time spent in this function.

+    :param rebuild_strict: True (Default) is the safer and better tested setting, in which case
+    `givens` must substitute new variables with the same Type as the variables they replace.
+    False is a you-better-know-what-you-are-doing setting, that permits `givens` to replace
+    variables with new variables of any Type.  The consequence of changing a Type is that all
+    results depending on that variable may have a different Type too (the graph is rebuilt from
+    inputs to outputs).  If one of the new types does not make sense for one of the Ops in the
+    graph, an Exception will be raised.
+
+    :type allow_input_downcast: Boolean or None
+    :param allow_input_downcast: True means that the values passed as
+    inputs when calling the function can be silently downcasted to fit
+    the dtype of the corresponding Variable, which may lose precision.
+    False means that it will only be cast to a more general, or
+    precise, type. None (default) is almost like False, but allows
+    downcasting of Python float scalars to floatX.
+
+    :type profile: None, True, or ProfileStats instance
+    :param profile: accumulate profiling information into a given ProfileStats
+    instance. If argument is `True` then a new ProfileStats instance will be
+    used.  This profiling object will be available via self.profile.
+
+    :param on_unused_input: What to do if a variable in the 'inputs' list is
+    not used in the graph. Possible values are 'raise', 'warn', and 'ignore'.
+
    :rtype: Function instance

    :returns: a callable object that will compute the outputs (given the inputs)

--- a/theano/compile/function.py
+++ b/theano/compile/function.py
@@ -44,10 +44,6 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,

    :param name: an optional name for this function. The profile mode will print the time spent in this function.

-    :rtype: Function instance
-    :returns: a callable object that will compute the outputs (given the inputs)
-    and update the implicit function arguments according to the `updates`.
-
    :param rebuild_strict: True (Default) is the safer and better tested setting, in which case
    `givens` must substitute new variables with the same Type as the variables they replace.
    False is a you-better-know-what-you-are-doing setting, that permits `givens` to replace
@@ -72,6 +68,10 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
    :param on_unused_input: What to do if a variable in the 'inputs' list is
    not used in the graph. Possible values are 'raise', 'warn', 'ignore' and None.

+    :rtype: Function instance
+    :returns: a callable object that will compute the outputs (given the inputs)
+    and update the implicit function arguments according to the `updates`.
+
    :note: Regarding givens: Be careful to make sure that these substitutions are
    independent--behaviour when Var1 of one pair appears in the graph leading to Var2 in
    another expression is undefined.  Replacements specified with givens are different from

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -1106,8 +1106,8 @@ class FunctionMaker(object):
                blockers=[i.variable for i in inputs])

        msg = ("theano.function was asked to create a function computing "
-                "outputs given certain inputs, but one of the provided "
-                "input variables is not part of the computational graph "
+                "outputs given certain inputs, but the provided input "
+                "variable at index %i is not part of the computational graph "
                "needed to compute the outputs: %s.\n%s")
        warn_msg = ("To make this warning into an error, you can pass the "
                "parameter on_unused_input='raise' to theano.function. "
@@ -1119,9 +1119,9 @@ class FunctionMaker(object):
        for i in inputs:
            if ((i.variable not in used_inputs) and (i.update is None)):
                if on_unused_input == 'warn':
-                    warnings.warn(msg % (i.variable, warn_msg), stacklevel=6)
+                    warnings.warn(msg % (inputs.index(i), i.variable, warn_msg), stacklevel=6)
                elif on_unused_input == 'raise':
-                    raise UnusedInputError(msg % (i.variable, err_msg))
+                    raise UnusedInputError(msg % (inputs.index(i), i.variable, err_msg))
                else:
                    raise ValueError(("Invalid value for keyword "
                        "on_unused_input of theano.function: '%s'. "

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -11,6 +11,9 @@ import sys
 from itertools import izip


+import numpy
+
+
 if sys.version_info[:2] >= (2, 5):
    import hashlib

@@ -918,7 +921,7 @@ class CLinker(link.Linker):
        The signature has the following form:
        {{{
            'CLinker.cmodule_key', compilation args, libraries,
-            header_dirs, config md5,
+            header_dirs, numpy ABI version, config md5,
            (op0, input_signature0, output_signature0),
            (op1, input_signature1, output_signature1),
            ...
@@ -986,11 +989,12 @@ class CLinker(link.Linker):
                          compile_args=self.compile_args(),
                          libraries=self.libraries(),
                          header_dirs=self.header_dirs(),
+                          c_compiler=self.c_compiler(),
                          )

    @staticmethod
    def cmodule_key_(env, no_recycling, compile_args=None, libraries=None,
-                     header_dirs=None, insert_config_md5=True):
+                     header_dirs=None, insert_config_md5=True, c_compiler=None):
        """
        Do the actual computation of cmodule_key in a static method
        to allow it to be reused in scalar.Composite.__eq__
@@ -1032,6 +1036,13 @@ class CLinker(link.Linker):
            args = tuple(args)
            sig.append(args)

+        #We must always add the numpy ABI version here as
+        # DynamicModule always add the include <numpy/arrayobject.h>
+        sig.append('NPY_ABI_VERSION=0x%X' %
+                   numpy.core.multiarray._get_ndarray_c_version())
+        if c_compiler:
+            sig.append('c_compiler_str=' + c_compiler.version_str())
+
        # IMPORTANT: The 'md5' prefix is used to isolate the compilation
        # parameters from the rest of the key. If you want to add more key
        # elements, they should be before this md5 hash if and only if they

--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -25,6 +25,7 @@ from theano.gof.cc import hash_from_code

 # we will abuse the lockfile mechanism when reading and writing the registry
 import compilelock
+from compiledir import gcc_version_str

 from theano.configparser import AddConfigVar, BoolParam

@@ -314,6 +315,7 @@ def get_module_hash(src_code, key):
        2. The version part of the key.
        3. The compiler options defined in `key` (command line parameters and
           libraries to link against).
+        4. The NumPy ABI version.
    """
    # `to_hash` will contain any element such that we know for sure that if
    # it changes, then the module hash should be different.
@@ -347,6 +349,9 @@ def get_module_hash(src_code, key):
                # This is the md5 hash of the config options. We can stop
                # here.
                break
+            elif (key_element.startswith('NPY_ABI_VERSION=0x') or
+                  key_element.startswith('c_compiler_str=')):
+                to_hash.append(key_element)
            else:
                raise AssertionError(error_msg)
        else:
@@ -1403,30 +1408,15 @@ def std_lib_dirs():
    return std_lib_dirs_and_libs()[1]


-# Using the dummy file descriptors below is a workaround for a crash
-# experienced in an unusual Python 2.4.4 Windows environment with the default
-# None values.
-dummy_in = open(os.devnull)
-dummy_err = open(os.devnull, 'w')
-p = None
-try:
-    p = subprocess.Popen(['g++', '-dumpversion'], stdout=subprocess.PIPE,
-                         stdin=dummy_in.fileno(), stderr=dummy_err.fileno())
-    p.wait()
-    gcc_version_str = p.stdout.readline().strip()
-except OSError:
-    # Typically means gcc cannot be found.
-    gcc_version_str = 'GCC_NOT_FOUND'
-del p
-del dummy_in
-del dummy_err
-
-
 def gcc_version():
    return gcc_version_str


 class GCC_compiler(object):
+    @staticmethod
+    def version_str():
+        return "g++ " + gcc_version_str
+
    @staticmethod
    def compile_args():
        cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag]

--- a/theano/gof/compiledir.py
+++ b/theano/gof/compiledir.py
@@ -3,6 +3,7 @@ import errno
 import os
 import platform
 import re
+import subprocess
 import shutil
 import sys
 import textwrap
@@ -13,11 +14,30 @@ import theano
 from theano.configparser import config, AddConfigVar, ConfigParam, StrParam
 from theano.gof.utils import flatten

+# Using the dummy file descriptors below is a workaround for a crash
+# experienced in an unusual Python 2.4.4 Windows environment with the default
+# None values.
+dummy_in = open(os.devnull)
+dummy_err = open(os.devnull, 'w')
+p = None
+try:
+    p = subprocess.Popen(['g++', '-dumpversion'], stdout=subprocess.PIPE,
+                         stdin=dummy_in.fileno(), stderr=dummy_err.fileno())
+    p.wait()
+    gcc_version_str = p.stdout.readline().strip()
+except OSError:
+    # Typically means gcc cannot be found.
+    gcc_version_str = 'GCC_NOT_FOUND'
+del p
+del dummy_in
+del dummy_err
+
 compiledir_format_dict = {"platform": platform.platform(),
                          "processor": platform.processor(),
                          "python_version": platform.python_version(),
                          "theano_version": theano.__version__,
                          "numpy_version": numpy.__version__,
+                          "g++": gcc_version_str.replace(" ", "_"),
                         }
 compiledir_format_keys = ", ".join(compiledir_format_dict.keys())
 default_compiledir_format =\
@@ -115,8 +135,11 @@ def cleanup():
    """
    Delete keys in old format from the compiledir.

-    We define keys in old format as keys that have an ndarray in them.
+    Old clean up include key in old format:
+    1) keys that have an ndarray in them.
       Now we use a hash in the keys of the constant data.
+    2) key that don't have the numpy ABI version in them
+    3) They do not have a compile version string

    If there is no key left for a compiled module, we delete the module.
    """
@@ -131,10 +154,20 @@ def cleanup():
                try:
                    keydata = cPickle.load(file)
                    for key in list(keydata.keys):
+                        have_npy_abi_version = False
+                        have_c_compiler = False
                        for obj in flatten(key):
                            if isinstance(obj, numpy.ndarray):
                                keydata.remove_key(key)
                                break
+                            elif isinstance(obj, basestring):
+                                if obj.startswith('NPY_ABI_VERSION=0x'):
+                                    have_npy_abi_version = True
+                                elif obj.startswith('c_compiler_str='):
+                                    have_c_compiler = True
+
+                        if not have_npy_abi_version or not have_c_compiler:
+                            keydata.remove_key(key)
                    if len(keydata.keys) == 0:
                        shutil.rmtree(os.path.join(compiledir, directory))


--- a/theano/sandbox/cuda/nvcc_compiler.py
+++ b/theano/sandbox/cuda/nvcc_compiler.py
@@ -8,16 +8,18 @@ import sys
 import warnings

 from theano.gof.cc import hash_from_file
-from theano.gof.cmodule import (std_libs, std_lib_dirs, std_include_dirs, dlimport,
+from theano.gof.cmodule import (std_libs, std_lib_dirs,
+                                std_include_dirs, dlimport,
                                get_lib_extension, local_bitwidth)

-_logger=logging.getLogger("theano.sandbox.cuda.nvcc_compiler")
+_logger = logging.getLogger("theano.sandbox.cuda.nvcc_compiler")
 _logger.setLevel(logging.WARN)

 from theano.configparser import config, AddConfigVar, StrParam, BoolParam

 AddConfigVar('nvcc.compiler_bindir',
-        "If defined, nvcc compiler driver will seek g++ and gcc in this directory",
+             "If defined, nvcc compiler driver will seek g++ and gcc"
+             " in this directory",
        StrParam(""))

 AddConfigVar('cuda.nvccflags',
@@ -40,6 +42,8 @@ AddConfigVar('nvcc.fastmath',

 nvcc_path = 'nvcc'
 nvcc_version = None
+
+
 def is_nvcc_available():
    """Return True iff the nvcc compiler is found."""
    try:
@@ -47,34 +51,42 @@ def is_nvcc_available():
                             stderr=subprocess.PIPE)
        p.wait()
        s = p.stdout.readlines()[-1].split(',')[1].strip().split()
-        assert s[0]=='release'
+        assert s[0] == 'release'
        global nvcc_version
        nvcc_version = s[1]
        return True
    except Exception:
        #try to find nvcc into cuda.root
-        p = os.path.join(config.cuda.root,'bin','nvcc')
+        p = os.path.join(config.cuda.root, 'bin', 'nvcc')
        if os.path.exists(p):
            global nvcc_path
            nvcc_path = p
            return True
-        else: return False
+        else:
+            return False
+

 def set_cuda_root():
    s = os.getenv("PATH")
    if not s:
        return
    for dir in s.split(os.path.pathsep):
-        if os.path.exists(os.path.join(dir,"nvcc")):
+        if os.path.exists(os.path.join(dir, "nvcc")):
            config.cuda.root = os.path.split(dir)[0]
            return

 rpath_defaults = []
+
+
 def add_standard_rpath(rpath):
    rpath_defaults.append(rpath)


 class NVCC_compiler(object):
+    @staticmethod
+    def version_str():
+        return "nvcc " + nvcc_version
+
    @staticmethod
    def compile_args():
        """
@@ -94,35 +106,41 @@ class NVCC_compiler(object):
            module_name, src_code,
            location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
            rpaths=rpath_defaults):
-        """
-        :param module_name: string (this has been embedded in the src_code
+        """:param module_name: string (this has been embedded in the src_code
        :param src_code: a complete c or c++ source listing for the module
-        :param location: a pre-existing filesystem directory where the cpp file and .so will be written
-        :param include_dirs: a list of include directory names (each gets prefixed with -I)
-        :param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
-        :param libs: a list of libraries to link with (each gets prefixed with -l)
+        :param location: a pre-existing filesystem directory where the
+                         cpp file and .so will be written
+        :param include_dirs: a list of include directory names
+                             (each gets prefixed with -I)
+        :param lib_dirs: a list of library search path directory names
+                         (each gets prefixed with -L)
+        :param libs: a list of libraries to link with
+                     (each gets prefixed with -l)
        :param preargs: a list of extra compiler arguments
-        :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
+        :param rpaths: list of rpaths to use with Xlinker.
+                       Defaults to `rpath_defaults`.

        :returns: dynamically-imported python module of the compiled code.

-        :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
-                 Otherwise nvcc never finish.
+        :note 1: On Windows 7 with nvcc 3.1 we need to compile in the
+                 real directory Otherwise nvcc never finish.
+
        """

        rpaths = list(rpaths)

-        if sys.platform=="win32":
+        if sys.platform == "win32":
            # Remove some compilation args that cl.exe does not understand.
            # cl.exe is the compiler used by nvcc on Windows.
-            for a in ["-Wno-write-strings","-Wno-unused-label",
+            for a in ["-Wno-write-strings", "-Wno-unused-label",
                      "-Wno-unused-variable", "-fno-math-errno"]:
                if a in preargs:
                    preargs.remove(a)
        if preargs is None:
-            preargs= []
-        else: preargs = list(preargs)
-        if sys.platform!='win32':
+            preargs = []
+        else:
+            preargs = list(preargs)
+        if sys.platform != 'win32':
            preargs.append('-fPIC')
        no_opt = False
        cuda_root = config.cuda.root
@@ -146,10 +164,10 @@ class NVCC_compiler(object):
                # No 64 bit CUDA libraries available on the mac, yet..
                lib_dirs.append(os.path.join(cuda_root, 'lib64'))

-
        if sys.platform == 'darwin':
-            # On the mac, nvcc is not able to link using -framework Python, so we have
-            # manually add the correct library and paths
+            # On the mac, nvcc is not able to link using -framework
+            # Python, so we have manually add the correct library and
+            # paths
            darwin_python_lib = commands.getoutput('python-config --ldflags')
        else:
            # sometimes, the linker cannot find -lpython so we need to tell it
@@ -174,9 +192,14 @@ class NVCC_compiler(object):
                (module_name, get_lib_extension()))

        _logger.debug('Generating shared lib %s', lib_filename)
-        # TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
-        preargs1=[pa for pa in preargs if pa.startswith('-O') or pa.startswith('--maxrregcount=')]#nvcc argument
-        preargs2=[pa for pa in preargs if pa not in preargs1]#other arguments
+        # TODO: Why do these args cause failure on gtx285 that has 1.3
+        # compute capability? '--gpu-architecture=compute_13',
+        # '--gpu-code=compute_13',
+        #nvcc argument
+        preargs1 = [pa for pa in preargs
+                    if pa.startswith('-O') or pa.startswith('--maxrregcount=')]
+        preargs2 = [pa for pa in preargs
+                    if pa not in preargs1]  # other arguments

        cmd = [nvcc_path, '-shared', '-g'] + preargs1
        if config.nvcc.compiler_bindir:
@@ -195,23 +218,25 @@ class NVCC_compiler(object):
                cmd.append('-m32')
                preargs2.append('-m32')

-        if len(preargs2)>0:
+        if len(preargs2) > 0:
            cmd.extend(['-Xcompiler', ','.join(preargs2)])

-        if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,'lib')):
-            rpaths.append(os.path.join(config.cuda.root,'lib'))
+        if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,
+                                                            'lib')):
+            rpaths.append(os.path.join(config.cuda.root, 'lib'))
            if sys.platform != 'darwin':
-                # the 64bit CUDA libs are in the same files as are named by the function above
-                rpaths.append(os.path.join(config.cuda.root,'lib64'))
+                # the 64bit CUDA libs are in the same files as are
+                # named by the function above
+                rpaths.append(os.path.join(config.cuda.root, 'lib64'))
        if sys.platform != 'win32':
            # the -rpath option is not understood by the Microsoft linker
            for rpath in rpaths:
-                cmd.extend(['-Xlinker',','.join(['-rpath',rpath])])
-        cmd.extend('-I%s'%idir for idir in include_dirs)
-        cmd.extend(['-o',lib_filename])
+                cmd.extend(['-Xlinker', ','.join(['-rpath', rpath])])
+        cmd.extend('-I%s' % idir for idir in include_dirs)
+        cmd.extend(['-o', lib_filename])
        cmd.append(os.path.split(cppfilename)[-1])
-        cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
-        cmd.extend(['-l%s'%l for l in libs])
+        cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
+        cmd.extend(['-l%s' % l for l in libs])
        if module_name != 'cuda_ndarray':
            cmd.append("-lcuda_ndarray")
        if sys.platform == 'darwin':
@@ -229,9 +254,10 @@ class NVCC_compiler(object):
                except ValueError, e:
                    done = True

-        # Remove "-u Symbol" arguments, since they are usually not relevant
-        # for the new compilation, even if they were used for compiling python.
-        # If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
+        # Remove "-u Symbol" arguments, since they are usually not
+        # relevant for the new compilation, even if they were used for
+        # compiling python.  If they are necessary, the nvcc syntax is
+        # "-U Symbol" with a capital U.
        done = False
        while not done:
            try:
@@ -244,14 +270,15 @@ class NVCC_compiler(object):
        # Fix for MacOS X.
        cmd = remove_python_framework_dir(cmd)
        # CUDA Toolkit v4.1 Known Issues:
-        # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option to nvcc
-        # this option is not recognized and generates an error
+        # Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option
+        # to nvcc this option is not recognized and generates an error
        # http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie
        # Passing -Xlinker -pie stops -no_pie from getting passed
        if sys.platform == 'darwin' and nvcc_version >= '4.1':
            cmd.extend(['-Xlinker', '-pie'])

-        #cmd.append("--ptxas-options=-v")  #uncomment this to see register and shared-mem requirements
+        #cmd.append("--ptxas-options=-v") #uncomment this to see
+        #register and shared-mem requirements
        _logger.debug('Running cmd %s', ' '.join(cmd))
        orig_dir = os.getcwd()
        try:
@@ -269,7 +296,8 @@ class NVCC_compiler(object):
        for eline in nvcc_stderr.split('\n'):
            if not eline:
                continue
-            if 'skipping incompatible' in eline: #ld is skipping an incompatible library
+            if 'skipping incompatible' in eline:
+                #ld is skipping an incompatible library
                continue
            if 'declared but never referenced' in eline:
                continue
@@ -294,11 +322,12 @@ class NVCC_compiler(object):
                print >> sys.stderr, l
            print >> sys.stderr, '==============================='
            for i, l in enumerate(src_code.split('\n')):
-                print >> sys.stderr,  i+1, l
-            raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd))
+                print >> sys.stderr,  i + 1, l
+            raise Exception('nvcc return status', p.returncode,
+                            'for cmd', ' '.join(cmd))

        #touch the __init__ file
-        file(os.path.join(location, "__init__.py"),'w').close()
+        file(os.path.join(location, "__init__.py"), 'w').close()
        return dlimport(lib_filename)