提交 6b4c592f authored 作者: Frederic's avatar Frederic

Add back the backward flow in the local_gpu_split opt

上级 102fb5e1
...@@ -304,7 +304,11 @@ def local_gpu_elemwise_1(node): ...@@ -304,7 +304,11 @@ def local_gpu_elemwise_1(node):
def local_gpu_split(node): def local_gpu_split(node):
if isinstance(node.op, tensor.Split): if isinstance(node.op, tensor.Split):
input = node.inputs[0] input = node.inputs[0]
if input.owner and isinstance(input.owner.op, HostFromGpu): outs_clients = reduce(list.__add__,
[out.clients for out in node.outputs])
if (input.owner and isinstance(input.owner.op, HostFromGpu) or
any([c != 'output' and isinstance(c.op, GpuFromHost) for c, idx
in outs_clients])):
new_op = GpuSplit(node.op.len_splits) new_op = GpuSplit(node.op.len_splits)
split_res = new_op(gpu_from_host(input), *node.inputs[1:]) split_res = new_op(gpu_from_host(input), *node.inputs[1:])
return [host_from_gpu(o) for o in split_res] return [host_from_gpu(o) for o in split_res]
......
...@@ -289,7 +289,7 @@ def test_local_gpu_subtensor(): ...@@ -289,7 +289,7 @@ def test_local_gpu_subtensor():
assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo]) assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
def test_local_split(): def test_local_gpu_split():
""" Test that the GpuSplit op is being applied and works """ """ Test that the GpuSplit op is being applied and works """
# Construct symbolic split # Construct symbolic split
x = tensor.fvector() x = tensor.fvector()
...@@ -310,6 +310,17 @@ def test_local_split(): ...@@ -310,6 +310,17 @@ def test_local_split():
# Check equality # Check equality
assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)]) assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
# Test the other path of the optimizer, when it is the output that
# is moved to the GPU.
ra = cuda.gpu_from_host(ra)
f = theano.function([x, splits], [ra, rb, rc],
mode=mode_with_gpu.excluding("InputToGpuOptimizer"))
gpu_res = f([0, 1, 2, 3, 4, 5], [3, 2, 1])
l = f.maker.fgraph.toposort()
assert any([isinstance(o.op, theano.sandbox.cuda.GpuSplit) for o in l])
# Check equality
assert all([(cpu == gpu).all() for cpu, gpu in zip(cpu_res, gpu_res)])
def test_print_op(): def test_print_op():
""" Test that print ops don't block gpu optimization""" """ Test that print ops don't block gpu optimization"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论