make the Print op more gpu friendtly.

a569f569 · Frederic Bastien · 258dcb6c · a569f569 · a569f569
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -383,6 +383,16 @@ def local_gpu_rebroadcast(node):
            gpu_x = x.owner.inputs[0]
            return [host_from_gpu(node.op(gpu_x))]
+@register_opt()
+@local_optimizer([])
+def local_print_op(node):
+    if isinstance(node.op, tensor.printing.Print):
+        x, = node.inputs
+        if x.owner and x.owner.op == host_from_gpu:
+            gpu_x, = x.owner.inputs
+            return [host_from_gpu(node.op(gpu_x))]
+    return False
 def cast(x, dtype):
    stype = scal.Scalar(dtype)
    cast_op = theano.tensor.Elemwise(scal.Identity(scal.specific_out(stype)))

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -21,8 +21,6 @@ else:
    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
    mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
-import theano.sandbox.cuda as cuda
 def test_no_shared_var_graph():
    """Test that the InputToGpuOptimizer optimizer make graph that don't have shared variable compiled too.
@@ -125,6 +123,20 @@ def test_opt_gpujoin_joinvectors_elemwise_then_minusone():
    assert numpy.allclose(numpy.asarray(f()), concat)
+def test_print_op():
+    """ Test that print ops don't block gpu optimization"""
+    b = tensor.fmatrix()
+    f = theano.function([b],theano.printing.Print()(b)*2, mode=mode_with_gpu)
+    #theano.printing.debugprint(f)
+    #print f.maker.env.toposort()
+#[GpuFromHost(<TensorType(float32, matrix)>), <theano.printing.Print object at 0x3581210>(GpuFromHost.0), GpuElemwise{mul}(CudaNdarray{[[ 2.]]}, <theano.printing.Print object at 0x3581210>.0), HostFromGpu(GpuElemwise{mul}.0)]
+    topo = f.maker.env.toposort()
+    assert topo[0].op == cuda.gpu_from_host
+    assert isinstance(topo[1].op, theano.printing.Print)
+    assert isinstance(topo[2].op, cuda.GpuElemwise)
+    assert topo[3].op == cuda.host_from_gpu
+    f(numpy.random.random((5,5)))
 def test_elemwise_fusion():
    """ Test the the GpuElemwise fusion work correctly"""
    shape = (3,4)