提交 b8aeac34 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

remove scan to gpu optimization from scan_opt

This optimization has been moved in sandbox/cuda
上级 d1d9a849
...@@ -1123,122 +1123,4 @@ optdb.register('scanOp_merge_inouts' ...@@ -1123,122 +1123,4 @@ optdb.register('scanOp_merge_inouts'
, 'fast_run' , 'fast_run'
, 'scan') , 'scan')
from theano.sandbox import cuda
if cuda.cuda_available:
from theano.sandbox.cuda.basic_ops import gpu_from_host, host_from_gpu
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.opt import register_opt, local_optimizer
def safe_to_gpu(x):
if (isinstance(x.type, TensorType) and
x.type.dtype == 'float32'):
return gpu_from_host(x)
else:
return x
def safe_to_cpu(x):
if isinstance(x.type, CudaNdarrayType):
return host_from_gpu(x)
else:
return x
def tensor_to_cuda(x):
if (isinstance(x.type, TensorType) and
x.type.dtype == 'float32'):
y = CudaNdarrayType( broadcastable = x.type.broadcastable)()
if x.name :
y.name = x.name +'[cuda]'
return y
else:
return x
@register_opt('scan')
@local_optimizer([])
def gpuScanOptimization(node):
"""
scan(host_from_gpu) -> host_from_gpu(GPUscan)
gpu_from_host(scan) -> GPUscan(gpu_from_host)
"""
#gpu_from_host(scan) -> GPUscan(gpu_from_host)
if node.op == gpu_from_host:
host_input = node.inputs[0]
if (host_input.owner and
isinstance(host_input.owner.op, scan_op.Scan) and
not host_input.owner.op.info['gpu'] and
len(host_input.owner.outputs) == 1 ):
# Note that we are not doing the right thing here !!
# This is because the local optimizer expects only one
# output that corresponds to the input of ``node``
# If we do this for each output seperately we will have
# multiple scan ops in the graph ( as many as outputs )
# and I'm not sure they will get merged into one again
# So for now I will just cover a limited case when there
# is only one output and the local optimizer can be used
# TODO (fix) : either make sure the different scans get
# merged or implement this optimization as a global
# optimization
thescan = host_input.owner.op
info = thescan.info.copy()
info['gpu'] = True
inputs = host_input.owner.inputs
nw_ins = [ inputs[0]]
e = ( 1+ thescan.n_seqs
+ thescan.n_mit_mot
+ thescan.n_mit_sot
+ thescan.n_sit_sot
+ thescan.n_shared_outs)
nw_ins += [safe_to_gpu(x) for x in inputs[1:e] ]
b = e
e = e + thescan.n_nit_sot
nw_ins += inputs[b:e]
nw_ins += [safe_to_gpu(x) for x in inputs[e:] ]
scan_ins = [ tensor_to_cuda(x) for x in thescan.inputs]
scan_outs = [ safe_to_gpu(x) for x in thescan.outputs ]
scan_outs = scan_utils.clone(
scan_outs
, replace = zip(thescan.inputs,
[safe_to_cpu(x) for x in scan_ins]))
nw_op = scan_op.Scan( scan_ins
, scan_outs
, info).make_node(*nw_ins)
_outputs = nw_op.outputs
return _outputs
#scan(host_from_gpu) -> host_from_gpu(GPUscan)
if (type(node.op) == scan_op.Scan
and not node.op.info['gpu']):
if numpy.any([(i.owner and i.owner.op == host_from_gpu)
for i in node.inputs]):
thescan = node.op
info = thescan.info.copy()
info['gpu'] = True
inputs = node.inputs
nw_ins = [ inputs[0]]
e = ( 1+ thescan.n_seqs
+ thescan.n_mit_mot
+ thescan.n_mit_sot
+ thescan.n_sit_sot
+ thescan.n_shared_outs)
nw_ins += [safe_to_gpu(x) for x in inputs[1:e] ]
b = e
e = e + thescan.n_nit_sot
nw_ins += inputs[b:e]
nw_ins += [safe_to_gpu(x) for x in inputs[e:] ]
scan_ins = [ tensor_to_cuda(x) for x in thescan.inputs]
scan_outs = [ safe_to_gpu(x) for x in thescan.outputs ]
scan_outs = scan_utils.clone(
scan_outs
, replace = zip(thescan.inputs
,[safe_to_cpu(x) for x in scan_ins]))
_outputs = scan_op.Scan(
scan_ins
, scan_outs
, info).make_node(*nw_ins).outputs
outputs = [safe_to_cpu(x) for x in _outputs]
return outputs
return False
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论