Add opt that move CpuContiguous to the GPU. fix gh-4400

9c8561fc · Frederic Bastien · 189b1352 · 9c8561fc · 9c8561fc · 9c8561fc
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -2280,6 +2280,17 @@ def local_gpu_contiguous_gpu_contiguous(node):
            return [inp]
+@register_opt('fast_compile')
+@local_optimizer([tensor.extra_ops.CpuContiguous])
+def local_gpu_contiguous(node):
+    if isinstance(node.op, tensor.extra_ops.CpuContiguous):
+        x, = node.inputs
+        if x.owner and isinstance(x.owner.op, HostFromGpu):
+            gpu_x, = x.owner.inputs
+            return [tensor.as_tensor_variable(gpu_contiguous(gpu_x))]
+    return False
 @register_opt()
 @local_optimizer([gpu_from_host, tensor.Eye])
 def local_gpu_eye(node):

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -108,6 +108,15 @@ def test_local_gpu_contiguous_gpu_contiguous():
                     if isinstance(node.op, basic_ops.GpuContiguous)])
+def test_local_gpu_contiguous():
+    a = tensor.fmatrix()
+    o = tensor.extra_ops.cpu_contiguous(a)
+    f = theano.function([a], o, mode=mode_with_gpu)
+    assert 1 == len([node for node in f.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
+    f([[2.]])
 def test_local_assert_no_cpu_op():
    numpy.random.seed(1)
    m = numpy.random.uniform(-1, 1, (10, 10)).astype("float32")

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayConstant, get_context,
 from .basic_ops import (as_gpuarray_variable, infer_context_name,
                        host_from_gpu, GpuToGpu,
                        HostFromGpu, GpuFromHost,
-                        GpuSplit, GpuContiguous,
+                        GpuSplit, GpuContiguous, gpu_contiguous,
                        GpuAlloc, GpuAllocEmpty, GpuReshape,
                        GpuEye, gpu_join, GpuJoin)
 from .blas import (gpu_dot22, GpuGemv, GpuGemm, GpuGer,
@@ -330,6 +330,12 @@ def local_gpu_contiguous_gpu_contiguous(node):
            return [inp]
+@register_opt('fast_compile')
+@op_lifter([tensor.extra_ops.CpuContiguous])
+def local_gpu_contiguous(node, context_name):
+    return gpu_contiguous
 @register_opt('fast_compile')
 @op_lifter([tensor.Reshape])
 def local_gpureshape(node, context_name):

--- a/theano/sandbox/gpuarray/tests/test_opt.py
+++ b/theano/sandbox/gpuarray/tests/test_opt.py
@@ -64,6 +64,15 @@ def test_local_gpu_contiguous_gpu_contiguous():
                     if isinstance(node.op, basic_ops.GpuContiguous)])
+def test_local_gpu_contiguous():
+    a = tensor.fmatrix()
+    o = tensor.extra_ops.cpu_contiguous(a)
+    f = theano.function([a], o, mode=mode_with_gpu)
+    assert 1 == len([node for node in f.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
+    f([[2.]])
 def test_flatten():
    m = theano.tensor.fmatrix()
    f = theano.function([m], m.flatten(), mode=mode_with_gpu)