Special Alloc and AllocEmpty to move them to the GPU in one pass.

fbc384cf · Frederic Bastien · sentient07 · 12fa1c5b · fbc384cf · fbc384cf
--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -323,7 +323,20 @@ class GraphToGPU(NavigatorOptimizer):
                    context_name = i.type.context_name
                    move_to_GPU = True
                    break
+            if (not move_to_GPU and
+                    isinstance(node.op, (theano.tensor.Alloc,
+                                         theano.tensor.AllocEmpty))):
+                # If the Alloc[Empty] have a client that will be moved
+                # to the GPU, we should move the Alloc* on the GPU.
+                # We approximate this by supposing that if we have an
+                # optimization for one of the clients op, then we will
+                # move the client to the GPU.
+                for c, _ in node.outputs[0].clients:
+                    if (c != 'output' and
+                        (self.local_optimizers_map.get(c.op, []) +
+                         self.local_optimizers_map.get(type(c.op)))):
+                        move_to_GPU = True
            new_ops = None
            outputs = []
            # Apply the lifter

--- a/theano/gpuarray/tests/test_opt.py
+++ b/theano/gpuarray/tests/test_opt.py
@@ -138,11 +138,21 @@ def test_local_gpualloc_memset_0():
    ones = numpy.ones((2,), dtype='float32')
    # Test with 0 from CPU op.
+    # Should not be transfered as the only client is the output
    a = tensor.alloc(z, i)
    f = theano.function([i], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
-    assert len(topo) == 2
+    assert len(topo) == 1
-    assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
+    assert isinstance(topo[0].op, theano.tensor.Alloc)
+    assert (numpy.asarray(f(6)) == 0).all()
+    # Test with 0 from CPU op.
+    # Should be transfered as it is used by another op.
+    a = tensor.alloc(z, i)
+    f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    assert len(topo) == 3
+    assert isinstance(topo[0].op, GpuAlloc)
    assert (numpy.asarray(f(6)) == 0).all()
    # Test with 0
@@ -177,19 +187,30 @@ def test_local_gpualloc_empty():
    ii = theano.tensor.iscalar()
    # Test with vector
+    # Should not be moved as the only client is the uutput
    a = tensor.AllocEmpty('float32')(i)
    f = theano.function([i], a, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
-    assert len(topo) == 2
+    assert len(topo) == 1
+    assert isinstance(topo[0].op, theano.tensor.AllocEmpty)
+    # This return not initilized data, so we can only check the shape
+    assert f(3).shape == (3,)
+    # Test with vector
+    # Should be moved
+    a = tensor.AllocEmpty('float32')(i)
+    f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
+    topo = f.maker.fgraph.toposort()
+    assert len(topo) == 3
    assert isinstance(topo[0].op, GpuAllocEmpty)
    # This return not initilized data, so we can only check the shape
    assert f(3).shape == (3,)
    # Test with matrix
    a = tensor.AllocEmpty('float32')(i, ii)
-    f = theano.function([i, ii], a, mode=mode_with_gpu)
+    f = theano.function([i, ii], a.cumsum(axis=0), mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
-    assert len(topo) == 2
+    assert len(topo) == 3
    assert isinstance(topo[0].op, GpuAllocEmpty)
    # This return not initilized data, so we can only check the shape
    assert f(3, 4).shape == (3, 4)