提交 0dfc7a51 authored 作者: f0k's avatar f0k 提交者: Jan Schlüter

Faster compilation and import with cuda backend

上级 76cfd7e8
......@@ -239,6 +239,16 @@ AddConfigVar('nvcc.fastmath',
# if theano.sandbox.cuda is loaded or not.
in_c_key=False)
AddConfigVar('nvcc.cudafe',
"If 'always' (the default), cudafe will be called for every GPU"
" Op compilation. If 'heuristic', it will only be called if the"
" source code appears to contain CUDA code. This can speed up"
" compilation and importing theano, but might fail to compile"
" some custom GPU Ops.",
EnumStr('always', 'heuristic'),
# Not needed in c key, does not affect the compilation result.
in_c_key=False)
AddConfigVar('gpuarray.sync',
"""If True, every op will make sure its work is done before
returning. Setting this to True will slow down execution,
......
......@@ -230,7 +230,19 @@ class NVCC_compiler(Compiler):
if python_lib not in lib_dirs:
lib_dirs.append(python_lib)
if (config.nvcc.cudafe == 'heuristic' and not
any(marker in src_code for marker in ("__global__", "__device__",
"__host__", "<<<",
"nvmatrix.cuh"))):
# only calls existing CUDA functions, can compile much faster
cppfilename = os.path.join(location, 'mod.cpp')
src_code = ("#include <cuda.h>\n"
"#include <cuda_runtime_api.h>\n" +
src_code)
else:
# contains CUDA host code or device functions, needs .cu extension
cppfilename = os.path.join(location, 'mod.cu')
with open(cppfilename, 'w') as cppfile:
_logger.debug('Writing module C++ code to %s', cppfilename)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论