Faster compilation and import with cuda backend

0dfc7a51 · f0k · Jan Schlüter · 76cfd7e8 · 0dfc7a51 · 0dfc7a51
--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -239,6 +239,16 @@ AddConfigVar('nvcc.fastmath',
             # if theano.sandbox.cuda is loaded or not.
             in_c_key=False)

+AddConfigVar('nvcc.cudafe',
+             "If 'always' (the default), cudafe will be called for every GPU"
+             " Op compilation. If 'heuristic', it will only be called if the"
+             " source code appears to contain CUDA code. This can speed up"
+             " compilation and importing theano, but might fail to compile"
+             " some custom GPU Ops.",
+             EnumStr('always', 'heuristic'),
+             # Not needed in c key, does not affect the compilation result.
+             in_c_key=False)
+
 AddConfigVar('gpuarray.sync',
             """If True, every op will make sure its work is done before
                returning.  Setting this to True will slow down execution,

--- a/theano/sandbox/cuda/nvcc_compiler.py
+++ b/theano/sandbox/cuda/nvcc_compiler.py
@@ -230,7 +230,19 @@ class NVCC_compiler(Compiler):
            if python_lib not in lib_dirs:
                lib_dirs.append(python_lib)

+        if (config.nvcc.cudafe == 'heuristic' and not
+            any(marker in src_code for marker in ("__global__", "__device__",
+                                                  "__host__", "<<<",
+                                                  "nvmatrix.cuh"))):
+            # only calls existing CUDA functions, can compile much faster
+            cppfilename = os.path.join(location, 'mod.cpp')
+            src_code = ("#include <cuda.h>\n"
+                        "#include <cuda_runtime_api.h>\n" +
+                        src_code)
+        else:
+            # contains CUDA host code or device functions, needs .cu extension
            cppfilename = os.path.join(location, 'mod.cu')
+
        with open(cppfilename, 'w') as cppfile:

            _logger.debug('Writing module C++ code to %s', cppfilename)