Merge pull request #2791 from caglar/gpu_opt_transfer

Assert no CPU op for #2471

Merge pull request #2791 from caglar/gpu_opt_transfer
c9bf24bf · Pascal Lamblin · 740d8fc7 · fbc08c46 · c9bf24bf · c9bf24bf
--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -126,6 +126,15 @@ AddConfigVar(
    in_c_key=False)
+# This flag determines whether or not to raise error/warning message if
+# there is a CPU Op in the computational graph.
+AddConfigVar(
+    'assert_no_cpu_op',
+    "Raise an error/warning if there is a CPU op in the computational graph.",
+    EnumStr('ignore', 'warn', 'raise', 'pdb', allow_override=True),
+    in_c_key=False)
 # Do not add FAST_RUN_NOGC to this list (nor any other ALL CAPS shortcut).
 # The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'.
 # The old all capital letter way of working is deprecated as it is not

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -5,6 +5,7 @@ import copy
 import sys
 import time
 import warnings
+import pdb
 import numpy
@@ -445,6 +446,27 @@ def local_gpu_dot_to_dot22(node):
                                                shape_out))]
    return False
+@local_optimizer(None)
+def local_assert_no_cpu_op(node):
+    if not isinstance(node.op, GpuOp) and all([var.owner and isinstance(var.owner.op,
+        HostFromGpu) for var in node.inputs]) and any([[c for c in var.clients
+                if isinstance(c[0].op, GpuFromHost)] for var in node.outputs]):
+            if config.assert_no_cpu_op == "warn":
+                _logger.warning(("CPU op %s is detected in the computational"
+                                 " graph") % node)
+            elif config.assert_no_cpu_op == "raise":
+                raise AssertionError("The op %s is on CPU." % node)
+            elif config.assert_no_cpu_op == "pdb":
+                pdb.set_trace()
+    return None
+# Register the local_assert_no_cpu_op:
+assert_no_cpu_op = theano.tensor.opt.in2out(local_assert_no_cpu_op,
+                                            name='assert_no_cpu_op')
+# 49.2 is after device specialization & fusion optimizations for last transfers
+theano.compile.optdb.register('assert_no_cpu_op', assert_no_cpu_op, 49.2)
 @register_opt()
 @local_optimizer([theano.ifelse.IfElse, gpu_from_host])
@@ -1915,6 +1937,7 @@ gpu_inplace_elemwise_optimizer = tensor.opt.inplace_elemwise_optimizer_op(
 optdb.register('gpu_inplace_elemwise_opt', gpu_inplace_elemwise_optimizer, 75,
               'fast_run', 'inplace', 'gpu_inplace')
 register_opt()(tensor.opt.local_remove_useless_assert)
 register_opt()(tensor.opt.local_shape_to_shape_i)

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -5,6 +5,7 @@ import unittest
 import numpy
 # Skip test if cuda_ndarray is not available.
 from nose.plugins.skip import SkipTest
+from nose.tools import assert_raises
 import theano
 from theano.compile.pfunc import pfunc
@@ -91,6 +92,35 @@ def test_local_gpu_contiguous_gpu_contiguous():
                     if isinstance(node.op, basic_ops.GpuContiguous)])
+def test_local_assert_no_cpu_op():
+    numpy.random.seed(1)
+    m = numpy.random.uniform(-1, 1, (10, 10)).astype("float32")
+    ms = cuda.shared_constructor(m, name="m_shared")
+    out = theano.tensor.tanh(ms).dot(ms.T)
+    mode_local_assert = mode_with_gpu.including("assert_no_cpu_op")
+    mode_local_assert = mode_local_assert.excluding("local_gpu_elemwise_0")
+    mode_local_assert = mode_local_assert.excluding("local_gpu_elemwise_1")
+    old = config.assert_no_cpu_op
+    # If the flag is raise
+    try:
+        config.assert_no_cpu_op = 'raise'
+        assert_raises(AssertionError, theano.function,
+                        [], out, mode=mode_local_assert)
+    finally:
+        config.assert_no_cpu_op = old
+    # If the flag is ignore
+    try:
+        config.assert_no_cpu_op = 'ignore'
+        theano.function([], out, mode=mode_local_assert)
+    finally:
+        config.assert_no_cpu_op = old
 def test_int_pow():
    a = CudaNdarrayType([False])()