提交 0414f66e authored 作者: Frederic Bastien's avatar Frederic Bastien

Correctly tag for gpu scan inplace opt for GPU.

上级 d4acc241
...@@ -493,6 +493,8 @@ def use(device, ...@@ -493,6 +493,8 @@ def use(device,
'fast_run') 'fast_run')
optdb.add_tags('gpu_after_fusion', optdb.add_tags('gpu_after_fusion',
'fast_run') 'fast_run')
optdb.add_tags('gpu_scanOp_make_inplace',
'fast_run')
if force: if force:
try: try:
......
...@@ -2535,12 +2535,13 @@ def local_gpu_allocempty(node): ...@@ -2535,12 +2535,13 @@ def local_gpu_allocempty(node):
def typeInfer(node): def typeInfer(node):
return typeConstructor return typeConstructor
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
optdb.register('gpu_scanOp_make_inplace', optdb.register('gpu_scanOp_make_inplace',
scan_opt.ScanInplaceOptimizer(typeInfer=typeInfer, scan_opt.ScanInplaceOptimizer(typeInfer=typeInfer,
gpu_flag=True), gpu_flag=True),
75, 75,
'gpu', 'gpu',
'fast_run',
'inplace', 'inplace',
'scan') 'scan')
......
...@@ -78,6 +78,7 @@ if pygpu: ...@@ -78,6 +78,7 @@ if pygpu:
import theano.compile import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor) theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile') optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
elif (config.init_gpu_device.startswith('cuda') or elif (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl')): config.init_gpu_device.startswith('opencl')):
if config.device != 'cpu': if config.device != 'cpu':
...@@ -91,6 +92,7 @@ if pygpu: ...@@ -91,6 +92,7 @@ if pygpu:
import theano.compile import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor) theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile') optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
from .basic_ops import (GpuAlloc, GpuAllocEmpty, GpuContiguous, GpuEye, from .basic_ops import (GpuAlloc, GpuAllocEmpty, GpuContiguous, GpuEye,
GpuFromHost, GpuJoin, GpuReshape, GpuSplit, GpuFromHost, GpuJoin, GpuReshape, GpuSplit,
......
...@@ -977,11 +977,12 @@ def _scan_type_infer(node): ...@@ -977,11 +977,12 @@ def _scan_type_infer(node):
context_name=context_name) context_name=context_name)
return typebuild return typebuild
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
optdb.register('gpua_scanOp_make_inplace', optdb.register('gpua_scanOp_make_inplace',
scan_opt.ScanInplaceOptimizer(typeInfer=_scan_type_infer, scan_opt.ScanInplaceOptimizer(typeInfer=_scan_type_infer,
gpua_flag=True), gpua_flag=True),
75, 75,
'gpuarray', 'gpuarray',
'fast_run',
'inplace', 'inplace',
'scan') 'scan')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论