提交 138dfcb9 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add an option for the gpuarray cache path and allow overriding

preallocate with a direct call.
上级 e984b7d2
...@@ -1681,13 +1681,22 @@ def default_compiledir(): ...@@ -1681,13 +1681,22 @@ def default_compiledir():
AddConfigVar( AddConfigVar(
'compiledir', 'compiledir',
"platform-dependent cache directory for compiled modules", "platform-dependent cache directory for compiled modules",
ConfigParam( ConfigParam(
default_compiledir, default_compiledir,
filter=filter_compiledir, filter=filter_compiledir,
allow_override=False), allow_override=False),
in_c_key=False) in_c_key=False)
AddConfigVar(
'gpuarray.cache_path',
'Directory to cache pre-compiled kernels for the gpuarray backend.',
ConfigParam(
lambda: os.path.join(config.compiledir, 'gpuarray_kernels'),
filter=filter_base_compiledir,
allow_override=False)
in_c_key=False)
# Check if there are remaining flags provided by the user through THEANO_FLAGS. # Check if there are remaining flags provided by the user through THEANO_FLAGS.
for key in THEANO_FLAGS_DICT.keys(): for key in THEANO_FLAGS_DICT.keys():
warnings.warn('Theano does not recognise this flag: {0}'.format(key)) warnings.warn('Theano does not recognise this flag: {0}'.format(key))
...@@ -41,7 +41,7 @@ def transfer(x, target): ...@@ -41,7 +41,7 @@ def transfer(x, target):
register_transfer(transfer) register_transfer(transfer)
def init_dev(dev, name=None): def init_dev(dev, name=None, preallocate=None):
global pygpu_activated global pygpu_activated
if not config.cxx: if not config.cxx:
raise RuntimeError("The new gpu-backend need a c++ compiler.") raise RuntimeError("The new gpu-backend need a c++ compiler.")
...@@ -53,9 +53,13 @@ def init_dev(dev, name=None): ...@@ -53,9 +53,13 @@ def init_dev(dev, name=None):
raise ValueError( raise ValueError(
"Your installed libgpuarray is not in sync, please make sure to have the appropriate version") "Your installed libgpuarray is not in sync, please make sure to have the appropriate version")
if dev not in init_dev.devmap: if dev not in init_dev.devmap:
if config.gpuarray.cache_path != '':
os.environ['GPUARRAY_CACHE_PATH'] = config.gpuarray.cache_path
if preallocate is None:
preallocate = config.gpuarray.preallocate
context = pygpu.init( context = pygpu.init(
dev, dev,
disable_alloc_cache=config.gpuarray.preallocate < 0, disable_alloc_cache=preallocate < 0,
single_stream=config.gpuarray.single_stream, single_stream=config.gpuarray.single_stream,
sched=config.gpuarray.sched) sched=config.gpuarray.sched)
context.dev = dev context.dev = dev
...@@ -73,14 +77,14 @@ def init_dev(dev, name=None): ...@@ -73,14 +77,14 @@ def init_dev(dev, name=None):
else: else:
print("Can not use cuDNN on context %s: %s" % (name, dnn.dnn_available.msg), print("Can not use cuDNN on context %s: %s" % (name, dnn.dnn_available.msg),
file=sys.stderr) file=sys.stderr)
if config.gpuarray.preallocate < 0: if preallocate < 0:
print("Disabling allocation cache on %s" % (dev,)) print("Disabling allocation cache on %s" % (dev,))
elif config.gpuarray.preallocate > 0: elif preallocate > 0:
MB = (1024 * 1024) MB = (1024 * 1024)
if config.gpuarray.preallocate <= 1: if preallocate <= 1:
gmem = min(config.gpuarray.preallocate, 0.95) * context.total_gmem gmem = min(preallocate, 0.95) * context.total_gmem
else: else:
gmem = config.gpuarray.preallocate * MB gmem = preallocate * MB
if gmem > context.free_gmem - 50 * MB: if gmem > context.free_gmem - 50 * MB:
print( print(
"WARNING: Preallocating too much memory can prevent cudnn and cublas from working properly") "WARNING: Preallocating too much memory can prevent cudnn and cublas from working properly")
...@@ -122,7 +126,8 @@ init_dev.devmap = {} ...@@ -122,7 +126,8 @@ init_dev.devmap = {}
def use(device, def use(device,
force=False, force=False,
default_to_move_computation_to_gpu=True, default_to_move_computation_to_gpu=True,
move_shared_to_gpu=True): move_shared_to_gpu=True,
preallocate=None):
""" """
Error and warning about CUDA should be displayed only when this Error and warning about CUDA should be displayed only when this
function is called. We need to be able to load this module only function is called. We need to be able to load this module only
...@@ -140,17 +145,20 @@ def use(device, ...@@ -140,17 +145,20 @@ def use(device,
computations to the gpu. computations to the gpu.
move_shared_to_gpu move_shared_to_gpu
If gpu init succeeded, put new shared variables on the gpu. If gpu init succeeded, put new shared variables on the gpu.
preallocate
If specified, will use this value for preallocation instead of
gpuarray.preallocate.
""" """
if force: if force:
if not device.startswith('cuda'): if not (device.startswith('cuda') or device.startswith('opencl')):
raise Exception("forced the init and bad device provided: " + raise Exception("forced the init and bad device provided: " +
device) device)
else: else:
# If we force, the device should not already be initialized. # If we force, the device should not already be initialized.
assert device not in init_dev.devmap assert device not in init_dev.devmap
if device: if device:
init_dev(device) init_dev(device, preallocate=preallocate)
if default_to_move_computation_to_gpu: if default_to_move_computation_to_gpu:
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile') optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run') optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论