提交 30131c73 authored 作者: Frederic's avatar Frederic

Fix IfElse gpu optimization that used the old ifelse interface.

Also make it work with multiple outputs.
上级 fcb37509
...@@ -11,6 +11,7 @@ import theano ...@@ -11,6 +11,7 @@ import theano
from theano.scan_module import scan_utils, scan_op, scan_opt from theano.scan_module import scan_utils, scan_op, scan_opt
from theano import scalar as scal from theano import scalar as scal
from theano import tensor, compile, gof from theano import tensor, compile, gof
import theano.ifelse
from theano.compile import optdb from theano.compile import optdb
from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB, from theano.gof import (local_optimizer, EquilibriumDB, SequenceDB, ProxyDB,
...@@ -366,36 +367,45 @@ def local_gpu_lazy_ifelse(node): ...@@ -366,36 +367,45 @@ def local_gpu_lazy_ifelse(node):
ifelse(host_from_gpu) -> host_from_gpu(ifelse) ifelse(host_from_gpu) -> host_from_gpu(ifelse)
""" """
if hasattr(theano, "lazycond"): if isinstance(node.op, theano.ifelse.IfElse) and not node.op.gpu:
gpu_ifelse = theano.lazycond.IfElse(gpu=True) gpu_ifelse = theano.ifelse.IfElse(node.op.n_outs, gpu=True)
if numpy.any([(i.owner and i.owner.op == host_from_gpu)
for i in node.inputs]) or numpy.any(
[c != 'output' and c.op == gpu_from_host for c, idx
in node.outputs[0].clients]):
if node.op == gpu_from_host: c = node.inputs[0]
host_input = node.inputs[0] outs = node.inputs[1:]
if (host_input.owner # Should not happen, but just in case
and host_input.owner.op == theano.lazycond.ifelse):
c, t, f = host_input.owner.inputs
if not isinstance(f.type, CudaNdarrayType):
f = gpu_from_host(f)
if not isinstance(t.type, CudaNdarrayType):
t = gpu_from_host(t)
if isinstance(c.type, CudaNdarrayType): if isinstance(c.type, CudaNdarrayType):
c = host_from_gpu(c) c = host_from_gpu(c)
return [gpu_ifelse(c, t, f)] for i in range(len(outs)):
if not isinstance(outs[i], CudaNdarrayType):
if node.op == theano.lazycond.ifelse: outs[i] = gpu_from_host(outs[i])
if numpy.any([(i.owner and i.owner.op == host_from_gpu) return [host_from_gpu(out) for out in
for i in node.inputs]): gpu_ifelse.make_node(c, *outs).outputs]
c, t, f = node.inputs
if not isinstance(f.type, CudaNdarrayType): if node.op == gpu_from_host:
f = gpu_from_host(f) host_input = node.inputs[0]
if not isinstance(t.type, CudaNdarrayType): if (host_input.owner and
t = gpu_from_host(t) isinstance(host_input.owner.op, theano.ifelse.IfElse) and
not host_input.owner.op.gpu):
gpu_ifelse = theano.ifelse.IfElse(host_input.owner.op.n_outs,
gpu=True)
c = host_input.owner.inputs[0]
outs = host_input.owner.inputs[1:]
# Should not happen, but just in case
if isinstance(c.type, CudaNdarrayType): if isinstance(c.type, CudaNdarrayType):
c = host_from_gpu(c) c = host_from_gpu(c)
return [host_from_gpu(gpu_ifelse(c, t, f))] for i in range(len(outs)):
if not isinstance(outs[i], CudaNdarrayType):
outs[i] = gpu_from_host(outs[i])
outs = gpu_ifelse.make_node(c, *outs).outputs
return outs
return False return False
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论