提交 b1b05e86 authored 作者: Frederic Bastien's avatar Frederic Bastien 提交者: sentient07

Rework the subtensor lifter. If it have only 1 clients, don't move it to the…

Rework the subtensor lifter. If it have only 1 clients, don't move it to the GPU. Simpler condition that do what we want.
上级 fbc384cf
...@@ -913,10 +913,7 @@ def local_gpua_subtensor(op, context_name, inputs, outputs): ...@@ -913,10 +913,7 @@ def local_gpua_subtensor(op, context_name, inputs, outputs):
isinstance(gpu_x.owner.op, GpuFromHost) and isinstance(gpu_x.owner.op, GpuFromHost) and
# And it is a shared var or an input of the graph. # And it is a shared var or an input of the graph.
not gpu_x.owner.inputs[0].owner): not gpu_x.owner.inputs[0].owner):
if len(x.clients) == 1: if len(x.clients) == 1 and len(outputs[0].clients) == 1:
if any([n == 'output' or any([isinstance(v.type, GpuArrayType)
for v in n.inputs + n.outputs])
for n, _ in outputs[0].clients]):
return return
# Here is the condition for the GraphToGPU opt. inputs is the # Here is the condition for the GraphToGPU opt. inputs is the
# inputs we want to use for the new node # inputs we want to use for the new node
...@@ -945,6 +942,11 @@ def local_gpua_subtensor_graph(op, context_name, inputs, outputs): ...@@ -945,6 +942,11 @@ def local_gpua_subtensor_graph(op, context_name, inputs, outputs):
# and is used by only 1 node. # and is used by only 1 node.
# x is in the new graph, so we can't tests its number of clients. # x is in the new graph, so we can't tests its number of clients.
if not cpu_x.owner and len(cpu_x.clients) == 1: if not cpu_x.owner and len(cpu_x.clients) == 1:
c = outputs[0].clients
# If the subtensor have only 1 client, do it on the CPU.
# We let the other optimization to take care to move the
# next node or not.
if len(c) == 1:
return return
return GpuSubtensor(op.idx_list) return GpuSubtensor(op.idx_list)
......
...@@ -355,7 +355,10 @@ def test_local_gpu_subtensor(): ...@@ -355,7 +355,10 @@ def test_local_gpu_subtensor():
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo]) assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo]) assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
assert any([isinstance(node.op, GpuElemwise) for node in topo]) # Our optimizer isn't smart enough to move to the GPU Elemwise.
# If it where just a little bit smarter, it could wrongly move it to the GPU.
# If it where super smart, it would know it should not move it to the GPU.
assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
def test_local_gpu_elemwise(): def test_local_gpu_elemwise():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论