提交 6b4c592f authored 作者: Frederic's avatar Frederic

Add back the backward flow in the local_gpu_split opt

上级 102fb5e1
......@@ -304,7 +304,11 @@ def local_gpu_elemwise_1(node):
def local_gpu_split(node):
if isinstance(node.op, tensor.Split):
input = node.inputs[0]
if input.owner and isinstance(input.owner.op, HostFromGpu):
outs_clients = reduce(list.__add__,
[out.clients for out in node.outputs])
if (input.owner and isinstance(input.owner.op, HostFromGpu) or
any([c != 'output' and isinstance(c.op, GpuFromHost) for c, idx
in outs_clients])):
new_op = GpuSplit(node.op.len_splits)
split_res = new_op(gpu_from_host(input), *node.inputs[1:])
return [host_from_gpu(o) for o in split_res]
......
......@@ -289,7 +289,7 @@ def test_local_gpu_subtensor():
assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
def test_local_split():
def test_local_gpu_split():
""" Test that the GpuSplit op is being applied and works """
# Construct symbolic split
x = tensor.fvector()
......@@ -310,6 +310,17 @@ def test_local_split():
# Check equality
assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
# Test the other path of the optimizer, when it is the output that
# is moved to the GPU.
ra = cuda.gpu_from_host(ra)
f = theano.function([x, splits], [ra, rb, rc],
mode=mode_with_gpu.excluding("InputToGpuOptimizer"))
gpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
l = f.maker.fgraph.toposort()
assert any([isinstance(o.op, theano.sandbox.cuda.GpuSplit) for o in l])
# Check equality
assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
def test_print_op():
""" Test that print ops don't block gpu optimization"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论