Merge pull request #465 from nouiz/compiler

Compiler

Merge pull request #465 from nouiz/compiler
f374e21e · lamblin · 1c1d7642 · ca895ef3 · f374e21e · f374e21e
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -11,6 +11,14 @@ Since 0.5rc2
 * Fix a bug with Gemv and Ger on CPU, when used on vectors with negative
   strides. Data was read from incorrect (and possibly uninitialized)
   memory space. This bug was probably introduced in 0.5rc1.
+ * The Theano flag "nvcc.flags" are now included in the hard part of the key.
+   This mean that now we recompile all modules for each value of "nvcc.flags".
+   This do use change the default, but if you used this flags, it was ignored
+   for module already compiled.
+ * The Theano flag "nvcc.fastmath" is now also used for the cuda_ndarray.cu file.
+ * Add the header_dirs to the hard part of the compilation key. This is
+   currently used only by cuda, but if we use library that are only headers,
+   this can be useful.
 =============
 Release Notes
@@ -189,7 +197,7 @@ Crashes fixed:
 * "Interactive debugger" crash fix. (Ian, Frederic)
 * Do not call gemm with strides 0, some blas refuse it. (Pascal Lamblin)
 * Optimization crash with gemm and complex. (Frederic)
- * GPU crash with elemwise. (Frederic)
+ * GPU crash with elemwise. (Frederic, some reported by Chris Currivan)
 * Compilation crash with amdlibm and the GPU. (Frederic)
 * IfElse crash. (Frederic)
 * Execution crash fix in AdvancedSubtensor1 on 32 bit computers. (Pascal)

--- a/doc/install.txt
+++ b/doc/install.txt
@@ -367,7 +367,7 @@ correctly (for example, for MKL this might be ``-lmkl -lguide -lpthread`` or
    a .dll, and on OS-X it might be either a .dylib or a .so.)
    This might be just a problem with the way Theano passes compilation
-    arguments to gcc, but the problem is not fixed yet.
+    arguments to g++, but the problem is not fixed yet.
 .. _gpu_linux:

--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -84,12 +84,12 @@ AddConfigVar('mode',
                'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE'),
        in_c_key=False)
-# Test whether or not gcc is present: disable C code if it is not.
+# Test whether or not g++ is present: disable C code if it is not.
 # Using the dummy file descriptor below is a workaround for a crash experienced
 # in an unusual Python 2.4.4 Windows environment with the default stdin=None.
 dummy_stdin = open(os.devnull)
 try:
-    subprocess.Popen('gcc', stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+    subprocess.Popen('g++', stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                     stdin=dummy_stdin.fileno())
    # Keep the default linker the same as the one for the mode FAST_RUN
    AddConfigVar('linker',
@@ -98,13 +98,13 @@ try:
                     'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
                 in_c_key=False)
 except OSError:
-    # gcc is not present, linker should default to python only
+    # g++ is not present, linker should default to python only
    AddConfigVar('linker',
                 "Default linker used if the theano flags mode is Mode or ProfileMode",
                 EnumStr('py', 'c|py', 'c', 'c|py_nogc', 'c&py',
                     'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
                 in_c_key=False)
-    _logger.warning('GCC not detected ! Theano will be unable to execute '
+    _logger.warning('g++ not detected ! Theano will be unable to execute '
            'optimized C-implementations (for both CPU and GPU) and will '
            'default to Python implementations. Performance will be severely '
            'degraded.')

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -622,6 +622,10 @@ class CLinker(link.Linker):
        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
            try: ret += x.c_compile_args()
            except utils.MethodNotDefined: pass
+        c_compiler = self.c_compiler()
+        ret += c_compiler.compile_args()
        ret=list(set(ret))#to remove duplicate
        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
            try:
@@ -661,7 +665,7 @@ class CLinker(link.Linker):
                    raise Exception('Nodes have requested specific different compilers',
                            (c_compiler, x_compiler))
        if (c_compiler is None):
-            return cmodule.gcc_module_compile_str
+            return cmodule.GCC_compiler
        else: return c_compiler
    def header_dirs(self):
@@ -797,7 +801,8 @@ class CLinker(link.Linker):
        The key returned by this function is of the form (version, signature)
        The signature has the following form:
        {{{
-            'CLinker.cmodule_key', compilation args, libraries, config md5,
+            'CLinker.cmodule_key', compilation args, libraries,
+            header_dirs, config md5,
            (op0, input_signature0, output_signature0),
            (op1, input_signature1, output_signature1),
            ...
@@ -857,11 +862,12 @@ class CLinker(link.Linker):
        """
        return self.cmodule_key_(self.env, self.no_recycling,
                          compile_args=self.compile_args(),
-                          libraries=self.libraries()
+                          libraries=self.libraries(),
+                          header_dirs=self.header_dirs(),
                          )
    @staticmethod
    def cmodule_key_(env, no_recycling, compile_args=[], libraries=[],
-            insert_config_md5=True):
+                     header_dirs=[], insert_config_md5=True):
        """
        Do the actual computation of cmodule_key in a static method
        to allow it to be reused in scalar.Composite.__eq__
@@ -877,8 +883,24 @@ class CLinker(link.Linker):
        # First we put the header, compile_args, library names and config md5
        # into the signature.
        sig = ['CLinker.cmodule_key'] # will be cast to tuple on return
-        if compile_args is not None: sig.append(tuple(compile_args))
+        if compile_args is not None:
-        if libraries is not None: sig.append(tuple(libraries))
+            # We must sort it as the order from a set are not guarantee.
+            # In  particular, 2 sets with the same content can give different
+            # order depending in the order you put data in it.
+            # Sets are used to remove duplicate elements.
+            args = sorted(compile_args)
+            args = tuple(args)
+            sig.append(args)
+        if libraries is not None:
+            # see comments for compile_args
+            args = sorted(libraries)
+            args = tuple(args)
+            sig.append(args)
+        if header_dirs is not None:
+            args = sorted(header_dirs)
+            args = tuple(args)
+            sig.append(args)
        # IMPORTANT: The 'md5' prefix is used to isolate the compilation
        # parameters from the rest of the key. If you want to add more key
@@ -889,12 +911,6 @@ class CLinker(link.Linker):
        else:
            sig.append('md5: <omitted>')
-        # technically this should only be appended for gcc-compiled Ops
-        # and the flags of other compilers should be inserted here... but it's not clear how to
-        # do this.
-        if config.gcc.cxxflags:
-            sig.append(config.gcc.cxxflags)
        error_on_play = [False]
        def in_sig(i, topological_pos, i_idx):
            # assert that every input to every node is one of'
@@ -1007,7 +1023,7 @@ class CLinker(link.Linker):
        libs = self.libraries()
        preargs = self.compile_args()
        compiler_name = c_compiler.__name__
-        if compiler_name == 'nvcc_module_compile_str' and config.lib.amdlibm:
+        if compiler_name == 'NVCC_compiler' and config.lib.amdlibm:
            # This lib does not work correctly with nvcc in device code.
            # and newer version of g++ as 4.5.1.
            # example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
@@ -1024,7 +1040,7 @@ class CLinker(link.Linker):
        try:
            _logger.debug("LOCATION %s", str(location))
            try:
-                module = c_compiler(
+                module = c_compiler.compile_str(
                    module_name=mod.name,
                    src_code=src_code,
                    location=location,

--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
--- a/theano/gof/cutils.py
+++ b/theano/gof/cutils.py
@@ -70,7 +70,9 @@ except ImportError:
            if not os.path.exists(loc):
                os.mkdir(loc)
-            cmodule.gcc_module_compile_str('cutils_ext', code, location=loc)
+            args = cmodule.GCC_compiler.compile_args()
+            cmodule.GCC_compiler.compile_str('cutils_ext', code, location=loc,
+                                             preargs=args)
            from cutils_ext.cutils_ext import *
    finally:

--- a/theano/gof/lazylinker_c.py
+++ b/theano/gof/lazylinker_c.py
@@ -53,7 +53,9 @@ except ImportError:
            loc = os.path.join(config.compiledir, dirname)
            if not os.path.exists(loc):
                os.mkdir(loc)
-            cmodule.gcc_module_compile_str(dirname, code, location=loc)
+            args = cmodule.GCC_compiler.compile_args()
+            cmodule.GCC_compiler.compile_str(dirname, code, location=loc,
+                                             preargs=args)
            # Save version into the __init__.py file.
            init_py = os.path.join(loc, '__init__.py')
            open(init_py, 'w').write('_version = %s\n' % version)

--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
 # Add the theano cache directory's cuda_ndarray subdirectory to the
 # list of places that are hard-coded into compiled modules' runtime
 # library search list.  This works in conjunction with
-# nvcc_compiler.nvcc_module_compile_str which adds this folder during
+# nvcc_compiler.NVCC_compiler.compile_str which adds this folder during
 # compilation with -L and also adds -lcuda_ndarray when compiling
 # modules.
 nvcc_compiler.add_standard_rpath(cuda_ndarray_loc)
@@ -117,11 +117,13 @@ try:
            if not os.path.exists(cuda_ndarray_loc):
                os.makedirs(cuda_ndarray_loc)
-            nvcc_compiler.nvcc_module_compile_str(
+            compiler = nvcc_compiler.NVCC_compiler()
+            compiler.compile_str(
                    'cuda_ndarray',
                    code,
                    location=cuda_ndarray_loc,
-                    include_dirs=[cuda_path], libs=['cublas'])
+                    include_dirs=[cuda_path], libs=['cublas'],
+                    preargs=compiler.compile_args())
            from cuda_ndarray.cuda_ndarray import *
 except Exception, e:
    _logger.error("Failed to compile cuda_ndarray.cu: %s", str(e))
@@ -130,7 +132,7 @@ except Exception, e:
 if cuda_available:
    # If necessary,
    # create a symlink called libcuda_ndarray.so
-    # which nvcc_module_compile_str uses when linking
+    # which nvcc_compiler.NVCC_compiler uses when linking
    # any module except "cuda_ndarray" itself.
    try:
        open(libcuda_ndarray_so).close()

--- a/theano/sandbox/cuda/nvcc_compiler.py
+++ b/theano/sandbox/cuda/nvcc_compiler.py
--- a/theano/sandbox/cuda/type.py
+++ b/theano/sandbox/cuda/type.py
@@ -12,7 +12,7 @@ try:
    # We must do those import to be able to create the full doc when nvcc
    # is not available
    import cuda_ndarray.cuda_ndarray as cuda
-    from theano.sandbox.cuda.nvcc_compiler import nvcc_module_compile_str
+    from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
    import cuda_ndarray
 except ImportError:
    pass
@@ -370,13 +370,10 @@ class CudaNdarrayType(Type):
        return (2,) # with assertion about refcounts
    def c_compiler(self):
-        return nvcc_module_compile_str
+        return NVCC_compiler
    def c_compile_args(self):
-        ret = []
+        return []
-        if config.nvcc.fastmath:
-            ret.append('-use_fast_math')
-        return ret
 # Register CudaNdarrayType to the OutputGuard list of known types

--- a/theano/scan_module/scan_perform_ext.py
+++ b/theano/scan_module/scan_perform_ext.py
@@ -50,10 +50,10 @@ except ImportError:
            loc = os.path.join(config.compiledir, dirname)
            if not os.path.exists(loc):
                os.mkdir(loc)
-            cmodule.gcc_module_compile_str(dirname, code, location=loc,
+            preargs = ['-pthread', '-fwrapv', '-O2', '-fno-strict-aliasing']
-                                           preargs = ['-pthread','-fwrapv',
+            preargs += cmodule.GCC_compiler.compile_args()
-                                                      '-O2',
+            cmodule.GCC_compiler.compile_str(dirname, code, location=loc,
-                                                      '-fno-strict-aliasing'])
+                                             preargs=preargs)
            # Save version into the __init__.py file.
            init_py = os.path.join(loc, '__init__.py')
            open(init_py, 'w').write('_version = %s\n' % version)