Do not move some Subtensor to the GPU when this do not make sence.

When we do this on an inputs/shared var and that this is the only use of it in the graph, we should only transfer to the GPU the small computation.

Do not move some Subtensor to the GPU when this do not make sence.
c7034860 · Frederic · dbb03761 · c7034860 · c7034860
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -839,6 +839,11 @@ def local_gpu_subtensor(node):
        if (x.owner and
            isinstance(x.owner.op, HostFromGpu) and
            x.dtype == "float32"):
+            gpu_x = x.owner.inputs[0]
+            if gpu_x.owner and isinstance(gpu_x.owner.op, GpuFromHost) and not gpu_x.owner.inputs[0].owner:
+                if len(x.clients) == 1:
+                    return
+
            gpu_x, = x.owner.inputs
            coords = node.inputs[1:]
            return [host_from_gpu(GpuSubtensor(

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -203,6 +203,34 @@ def test_opt_gpujoin_joinvectors_elemwise_then_minusone():

    assert numpy.allclose(numpy.asarray(f()), concat)

+
+def test_local_gpu_subtensor():
+    # Test shared forced on CPU.
+    t = tensor._shared(numpy.zeros(20, "float32"))
+    f = theano.function([], t[3:4], mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    assert any([type(node.op) is tensor.Subtensor for node in topo])
+    assert not any([isinstance(node.op, cuda.GpuSubtensor) for node in topo])
+    theano.printing.debugprint(f)
+
+    # Test graph input.
+    t = tensor.fmatrix()
+    f = theano.function([t], t[3:4], mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    assert any([type(node.op) is tensor.Subtensor for node in topo])
+    assert not any([isinstance(node.op, cuda.GpuSubtensor) for node in topo])
+    theano.printing.debugprint(f)
+
+    # Test multiple use of the input
+    # We want the subtensor to be on the GPU to prevent multiple transfer.
+    t = tensor.fmatrix()
+    f = theano.function([t], [t[3:4], t+1], mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    theano.printing.debugprint(f)
+    assert not any([type(node.op) is tensor.Subtensor for node in topo])
+    assert any([isinstance(node.op, cuda.GpuSubtensor) for node in topo])
+
+
 def test_print_op():
    """ Test that print ops don't block gpu optimization"""
    b = tensor.fmatrix()