提交 64c6fb65 authored 作者: Caglar's avatar Caglar

Added the new cpu assert opt.

上级 27060dbb
......@@ -126,6 +126,15 @@ AddConfigVar(
in_c_key=False)
# This flag determines whether or not to raise error/warning message if there is a
# CPU in the computational graph.
AddConfigVar(
'assert_no_cpu_op',
"Raise an error/warning if there is a CPU op in the computational graph.",
EnumStr('ignore', 'warn', 'raise', 'pdb', allow_override=False),
in_c_key=False)
# Do not add FAST_RUN_NOGC to this list (nor any other ALL CAPS shortcut).
# The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'.
# The old all capital letter way of working is deprecated as it is not
......
......@@ -446,6 +446,24 @@ def local_gpu_dot_to_dot22(node):
return False
@local_optimizer([gpu_from_host, host_from_gpu])
def local_assert_no_cpu_op(node):
if not isinstance(node.op, GpuOp) and all([var.owner and isinstance(var.owner.op,
HostFromGpu) for var in node.inputs]) and all([var.owner and
isinstance(var.owner.op, GpuFromHost) for var in node.outputs]):
if config.assert_no_cpu_op == "warn":
_logger.warning(("CPU op %s is detected in the computational"
" graph") % node)
elif config.assert_no_cpu_op == "raise":
raise RuntimeError("The op %s is on CPU." % node)
elif config.assert_no_cpu_op == "pdb":
import ipdb; ipdb.set_trace()
return None
if config.assert_no_cpu_op != "ignore" and config.assert_no_cpu_op:
register_opt()(local_assert_no_cpu_op)
@register_opt()
@local_optimizer([theano.ifelse.IfElse, gpu_from_host])
def local_gpu_lazy_ifelse(node):
......@@ -1911,6 +1929,7 @@ gpu_inplace_elemwise_optimizer = tensor.opt.inplace_elemwise_optimizer_op(
optdb.register('gpu_inplace_elemwise_opt', gpu_inplace_elemwise_optimizer, 75,
'fast_run', 'inplace', 'gpu_inplace')
register_opt()(tensor.opt.local_remove_useless_assert)
register_opt()(tensor.opt.local_shape_to_shape_i)
......
......@@ -91,6 +91,16 @@ def test_local_gpu_contiguous_gpu_contiguous():
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_local_assert_no_cpu_op():
x = theano.tensor.fscalar()
y = theano.tensor.fscalar()
z = x * y
f = theano.function([x, y], z, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
a_op = [n for n in topo if not isinstance(n.op, cuda.GpuElemwise)]
assert len(a_op) == 3
def test_int_pow():
a = CudaNdarrayType([False])()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论