Opt GpuContiguous(GpuContiguous(x))

0ad8e57b · Frederic · f1b0fac7 · 0ad8e57b · 0ad8e57b · 0ad8e57b
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -20,6 +20,7 @@ from theano.gof.opt import LocalMetaOptimizer
 from theano.sandbox.cuda.basic_ops import (
    gpu_eye, gpu_contiguous,
    gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
+    GpuContiguous,
    GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten,
    GpuSubtensor, GpuAdvancedSubtensor1,
    GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
@@ -1997,6 +1998,19 @@ def local_gpualloc_memset_0(node):
            return [new_out]
+@register_opt()
+@local_optimizer([GpuContiguous])
+def local_gpu_contiguous_gpu_contiguous(node):
+    """
+    gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
+    """
+    if isinstance(node.op, GpuContiguous):
+        inp = node.inputs[0]
+        if inp.owner and isinstance(inp.owner.op, GpuContiguous):
+            return [inp]
 @register_opt()
 @local_optimizer([gpu_from_host, tensor.Eye])
 def local_gpu_eye(node):

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -79,6 +79,18 @@ def test_local_remove_all_assert():
    assert len(a_op) == 1
+def test_local_gpu_contiguous_gpu_contiguous():
+    a = tensor.fmatrix()
+    o1 = basic_ops.gpu_contiguous(a)
+    o2 = basic_ops.gpu_contiguous(o1)
+    f1 = theano.function([a], o1, mode=mode_with_gpu)
+    f2 = theano.function([a], o2, mode=mode_with_gpu)
+    assert 1 == len([node for node in f1.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
+    assert 1 == len([node for node in f2.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
 def test_int_pow():
    a = CudaNdarrayType([False])()

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -20,7 +20,7 @@ from theano.tensor.nnet.conv import ConvOp
 from .type import GpuArrayType, GpuArrayConstant
 from .basic_ops import (host_from_gpu, gpu_from_host,
                        HostFromGpu, GpuFromHost,
-                        GpuSplit,
+                        GpuSplit, GpuContiguous,
                        gpu_alloc, GpuAlloc, GpuReshape,
                        GpuEye, gpu_join, GpuJoin)
 from .blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
@@ -205,6 +205,19 @@ def local_gpualloc_memset_0(node):
            return [new_out]
+@register_opt()
+@local_optimizer([GpuContiguous])
+def local_gpu_contiguous_gpu_contiguous(node):
+    """
+    gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
+    """
+    if isinstance(node.op, GpuContiguous):
+        inp = node.inputs[0]
+        if inp.owner and isinstance(inp.owner.op, GpuContiguous):
+            return [inp]
 @register_opt('fast_compile')
 @op_lifter([tensor.Reshape])
 def local_gpureshape(node):

--- a/theano/sandbox/gpuarray/tests/test_opt.py
+++ b/theano/sandbox/gpuarray/tests/test_opt.py
@@ -7,6 +7,7 @@ from theano.tests.unittest_tools import SkipTest
 from theano.tensor.tests import test_basic
 import theano.sandbox.gpuarray
+from theano.sandbox.gpuarray import basic_ops
 from ..type import GpuArrayType, gpuarray_shared_constructor
 from ..basic_ops import (GpuAlloc, GpuReshape, gpu_alloc,
                         gpu_from_host, host_from_gpu)
@@ -48,6 +49,18 @@ def test_local_remove_all_assert():
    assert len(a_op) == 1
+def test_local_gpu_contiguous_gpu_contiguous():
+    a = tensor.fmatrix()
+    o1 = basic_ops.gpu_contiguous(a)
+    o2 = basic_ops.gpu_contiguous(o1)
+    f1 = theano.function([a], o1, mode=mode_with_gpu)
+    f2 = theano.function([a], o2, mode=mode_with_gpu)
+    assert 1 == len([node for node in f1.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
+    assert 1 == len([node for node in f2.maker.fgraph.toposort()
+                     if isinstance(node.op, basic_ops.GpuContiguous)])
 def test_flatten():
    m = theano.tensor.fmatrix()
    f = theano.function([m], m.flatten(), mode=mode_with_gpu)