提交 0414f66e authored 作者: Frederic Bastien's avatar Frederic Bastien

Correctly tag for gpu scan inplace opt for GPU.

上级 d4acc241
......@@ -493,6 +493,8 @@ def use(device,
'fast_run')
optdb.add_tags('gpu_after_fusion',
'fast_run')
optdb.add_tags('gpu_scanOp_make_inplace',
'fast_run')
if force:
try:
......
......@@ -2535,12 +2535,13 @@ def local_gpu_allocempty(node):
def typeInfer(node):
return typeConstructor
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
optdb.register('gpu_scanOp_make_inplace',
scan_opt.ScanInplaceOptimizer(typeInfer=typeInfer,
gpu_flag=True),
75,
'gpu',
'fast_run',
'inplace',
'scan')
......
......@@ -78,6 +78,7 @@ if pygpu:
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
elif (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl')):
if config.device != 'cpu':
......@@ -91,6 +92,7 @@ if pygpu:
import theano.compile
theano.compile.shared_constructor(gpuarray_shared_constructor)
optdb.add_tags('gpuarray_opt', 'fast_run', 'fast_compile')
optdb.add_tags('gpua_scanOp_make_inplace', 'fast_run')
from .basic_ops import (GpuAlloc, GpuAllocEmpty, GpuContiguous, GpuEye,
GpuFromHost, GpuJoin, GpuReshape, GpuSplit,
......
......@@ -977,11 +977,12 @@ def _scan_type_infer(node):
context_name=context_name)
return typebuild
# Do not register in fast_run or fast_compile.
# It will be added to fast_run if the GPU is enabled.
optdb.register('gpua_scanOp_make_inplace',
scan_opt.ScanInplaceOptimizer(typeInfer=_scan_type_infer,
gpua_flag=True),
75,
'gpuarray',
'fast_run',
'inplace',
'scan')
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论