提交 4897207a authored 作者: Frederic's avatar Frederic

Add fast_compile to GPU opt that move computation to the GPU.

上级 f02a596e
......@@ -414,6 +414,7 @@ def use(device,
if default_to_move_computation_to_gpu:
optdb.add_tags('gpu_opt',
'fast_compile',
'fast_run',
'inplace')
optdb.add_tags('gpu_after_fusion',
......
......@@ -55,10 +55,10 @@ gpu_optimizer = EquilibriumDB(ignore_newtrees=False)
gpu_cut_copies = EquilibriumDB()
gpu_seqopt = SequenceDB()
gpu_seqopt.register('gpu_local_optimizations', gpu_optimizer, 1,
'fast_run', 'inplace', 'gpu')
'fast_run', 'fast_compile', 'inplace', 'gpu')
gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2,
'fast_run', 'gpu')
# DO NOT PUT fast_run in gpu_opt! This will ALWAYS enable the GPU!
'fast_run', 'fast_compile', 'gpu')
# DO NOT PUT fast_run or fast_compile in gpu_opt! This will ALWAYS enable the GPU!
optdb.register('gpu_opt',
gpu_seqopt,
optdb.__position__.get('add_destroy_handler', 49.5) - 1,
......@@ -72,13 +72,15 @@ optdb.register('gpu_after_fusion',
'gpu')
## Register merge_optimizer as a global opt
gpu_optimizer.register('gpu_merge', theano.gof.opt.merge_optimizer, 'fast_run')
gpu_optimizer.register('gpu_merge', theano.gof.opt.merge_optimizer,
'fast_run', 'fast_compile')
def register_opt(*tags, **kwargs):
def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__
gpu_optimizer.register(name, local_opt, 'fast_run', 'gpu', *tags)
gpu_optimizer.register(name, local_opt, 'fast_run', 'fast_compile',
'gpu', *tags)
return local_opt
return f
......@@ -163,14 +165,15 @@ def local_cut_gpu_host_gpu(node):
return [node.inputs[0].owner.inputs[0]]
return False
gpu_cut_copies.register('cut_gpu_host_transfers', local_cut_gpu_host_gpu,
'fast_run', 'gpu')
'fast_run', 'fast_compile', 'gpu')
gpu_cut_copies.register('cut_gpu_constant_transfers',
tensor.opt.constant_folding,
'fast_run', 'gpu')
'fast_run', 'fast_compile', 'gpu')
#register it into canonicalize to allow other optimization to work without
#botering with this useless pattern.
optdb['canonicalize'].register('local_cut_gpu_host_gpu',
local_cut_gpu_host_gpu, 'fast_run', 'gpu')
local_cut_gpu_host_gpu,
'fast_run', 'fast_compile', 'gpu')
# 'float64', 'complex128' and 'complex64' are not supported in elemwise
# on the gpu.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论