提交 c9bf24bf authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2791 from caglar/gpu_opt_transfer

Assert no CPU op for #2471
......@@ -126,6 +126,15 @@ AddConfigVar(
in_c_key=False)
# This flag determines whether or not to raise error/warning message if
# there is a CPU Op in the computational graph.
AddConfigVar(
'assert_no_cpu_op',
"Raise an error/warning if there is a CPU op in the computational graph.",
EnumStr('ignore', 'warn', 'raise', 'pdb', allow_override=True),
in_c_key=False)
# Do not add FAST_RUN_NOGC to this list (nor any other ALL CAPS shortcut).
# The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'.
# The old all capital letter way of working is deprecated as it is not
......
......@@ -5,6 +5,7 @@ import copy
import sys
import time
import warnings
import pdb
import numpy
......@@ -445,6 +446,27 @@ def local_gpu_dot_to_dot22(node):
shape_out))]
return False
@local_optimizer(None)
def local_assert_no_cpu_op(node):
if not isinstance(node.op, GpuOp) and all([var.owner and isinstance(var.owner.op,
HostFromGpu) for var in node.inputs]) and any([[c for c in var.clients
if isinstance(c[0].op, GpuFromHost)] for var in node.outputs]):
if config.assert_no_cpu_op == "warn":
_logger.warning(("CPU op %s is detected in the computational"
" graph") % node)
elif config.assert_no_cpu_op == "raise":
raise AssertionError("The op %s is on CPU." % node)
elif config.assert_no_cpu_op == "pdb":
pdb.set_trace()
return None
# Register the local_assert_no_cpu_op:
assert_no_cpu_op = theano.tensor.opt.in2out(local_assert_no_cpu_op,
name='assert_no_cpu_op')
# 49.2 is after device specialization & fusion optimizations for last transfers
theano.compile.optdb.register('assert_no_cpu_op', assert_no_cpu_op, 49.2)
@register_opt()
@local_optimizer([theano.ifelse.IfElse, gpu_from_host])
......@@ -1915,6 +1937,7 @@ gpu_inplace_elemwise_optimizer = tensor.opt.inplace_elemwise_optimizer_op(
optdb.register('gpu_inplace_elemwise_opt', gpu_inplace_elemwise_optimizer, 75,
'fast_run', 'inplace', 'gpu_inplace')
register_opt()(tensor.opt.local_remove_useless_assert)
register_opt()(tensor.opt.local_shape_to_shape_i)
......
......@@ -5,6 +5,7 @@ import unittest
import numpy
# Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
import theano
from theano.compile.pfunc import pfunc
......@@ -91,6 +92,35 @@ def test_local_gpu_contiguous_gpu_contiguous():
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_local_assert_no_cpu_op():
numpy.random.seed(1)
m = numpy.random.uniform(-1, 1, (10, 10)).astype("float32")
ms = cuda.shared_constructor(m, name="m_shared")
out = theano.tensor.tanh(ms).dot(ms.T)
mode_local_assert = mode_with_gpu.including("assert_no_cpu_op")
mode_local_assert = mode_local_assert.excluding("local_gpu_elemwise_0")
mode_local_assert = mode_local_assert.excluding("local_gpu_elemwise_1")
old = config.assert_no_cpu_op
# If the flag is raise
try:
config.assert_no_cpu_op = 'raise'
assert_raises(AssertionError, theano.function,
[], out, mode=mode_local_assert)
finally:
config.assert_no_cpu_op = old
# If the flag is ignore
try:
config.assert_no_cpu_op = 'ignore'
theano.function([], out, mode=mode_local_assert)
finally:
config.assert_no_cpu_op = old
def test_int_pow():
a = CudaNdarrayType([False])()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论