提交 30131c73 authored 作者: Frederic's avatar Frederic

Fix IfElse gpu optimization that used the old ifelse interface.

Also make it work with multiple outputs.
上级 fcb37509
...@@ -11,6 +11,7 @@ import theano ...@@ -11,6 +11,7 @@ import theano
from theano.scan_module import scan_utils, scan_op, scan_opt from theano.scan_module import scan_utils, scan_op, scan_opt
from theano import scalar as scal from theano import scalar as scal
from theano import tensor, compile, gof from theano import tensor, compile, gof
import theano.ifelse
from theano.compile import optdb from theano.compile import optdb
from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB, from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB,
...@@ -366,36 +367,45 @@ def local_gpu_lazy_ifelse(node): ...@@ -366,36 +367,45 @@ def local_gpu_lazy_ifelse(node):
ifelse(host_from_gpu) -> host_from_gpu(ifelse) ifelse(host_from_gpu) -> host_from_gpu(ifelse)
""" """
if hasattr(theano, "lazycond"): if isinstance(node.op, theano.ifelse.IfElse) and not node.op.gpu:
gpu_ifelse = theano.lazycond.IfElse(gpu=True) gpu_ifelse = theano.ifelse.IfElse(node.op.n_outs, gpu=True)
if numpy.any([(i.owner and i.owner.op == host_from_gpu)
if node.op == gpu_from_host: for i in node.inputs]) or numpy.any(
host_input = node.inputs[0] [c != 'output' and c.op == gpu_from_host for c, idx
if (host_input.owner in node.outputs[0].clients]):
and host_input.owner.op == theano.lazycond.ifelse):
c, t, f = host_input.owner.inputs c = node.inputs[0]
if not isinstance(f.type, CudaNdarrayType): outs = node.inputs[1:]
f = gpu_from_host(f) # Should not happen, but just in case
if not isinstance(t.type, CudaNdarrayType): if isinstance(c.type, CudaNdarrayType):
t = gpu_from_host(t) c = host_from_gpu(c)
if isinstance(c.type, CudaNdarrayType):
c = host_from_gpu(c) for i in range(len(outs)):
if not isinstance(outs[i], CudaNdarrayType):
return [gpu_ifelse(c, t, f)] outs[i] = gpu_from_host(outs[i])
return [host_from_gpu(out) for out in
if node.op == theano.lazycond.ifelse: gpu_ifelse.make_node(c, *outs).outputs]
if numpy.any([(i.owner and i.owner.op == host_from_gpu)
for i in node.inputs]): if node.op == gpu_from_host:
c, t, f = node.inputs host_input = node.inputs[0]
if (host_input.owner and
if not isinstance(f.type, CudaNdarrayType): isinstance(host_input.owner.op, theano.ifelse.IfElse) and
f = gpu_from_host(f) not host_input.owner.op.gpu):
if not isinstance(t.type, CudaNdarrayType): gpu_ifelse = theano.ifelse.IfElse(host_input.owner.op.n_outs,
t = gpu_from_host(t) gpu=True)
if isinstance(c.type, CudaNdarrayType):
c = host_from_gpu(c) c = host_input.owner.inputs[0]
outs = host_input.owner.inputs[1:]
return [host_from_gpu(gpu_ifelse(c, t, f))] # Should not happen, but just in case
if isinstance(c.type, CudaNdarrayType):
c = host_from_gpu(c)
for i in range(len(outs)):
if not isinstance(outs[i], CudaNdarrayType):
outs[i] = gpu_from_host(outs[i])
outs = gpu_ifelse.make_node(c, *outs).outputs
return outs
return False return False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论