Rework the subtensor lifter. If it have only 1 clients, don't move it to the…

Rework the subtensor lifter. If it have only 1 clients, don't move it to the GPU. Simpler condition that do what we want.

Rework the subtensor lifter. If it have only 1 clients, don't move it to the…
b1b05e86 · Frederic Bastien · sentient07 · fbc384cf · b1b05e86 · b1b05e86
--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -913,11 +913,8 @@ def local_gpua_subtensor(op, context_name, inputs, outputs):
                isinstance(gpu_x.owner.op, GpuFromHost) and
                # And it is a shared var or an input of the graph.
                not gpu_x.owner.inputs[0].owner):
-            if len(x.clients) == 1:
+            if len(x.clients) == 1 and len(outputs[0].clients) == 1:
-                if any([n == 'output' or any([isinstance(v.type, GpuArrayType)
+                return
-                                              for v in n.inputs + n.outputs])
-                        for n, _ in outputs[0].clients]):
-                    return
    # Here is the condition for the GraphToGPU opt. inputs is the
    # inputs we want to use for the new node
    if (x.owner and isinstance(x.owner.op, GpuFromHost)):
@@ -945,7 +942,12 @@ def local_gpua_subtensor_graph(op, context_name, inputs, outputs):
        # and is used by only 1 node.
        # x is in the new graph, so we can't tests its number of clients.
        if not cpu_x.owner and len(cpu_x.clients) == 1:
-            return
+            c = outputs[0].clients
+            # If the subtensor have only 1 client, do it on the CPU.
+            # We let the other optimization to take care to move the
+            # next node or not.
+            if len(c) == 1:
+                return
    return GpuSubtensor(op.idx_list)

--- a/theano/gpuarray/tests/test_opt.py
+++ b/theano/gpuarray/tests/test_opt.py
@@ -355,7 +355,10 @@ def test_local_gpu_subtensor():
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
-    assert any([isinstance(node.op, GpuElemwise) for node in topo])
+    # Our optimizer isn't smart enough to move to the GPU Elemwise.
+    # If it where just a little bit smarter, it could wrongly move it to the GPU.
+    # If it where super smart, it would know it should not move it to the GPU.
+    assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
 def test_local_gpu_elemwise():