提交 2f6d63b3 authored 作者: Frederic Bastien's avatar Frederic Bastien

Add the reverse version of local_gpu_contiguous

上级 cbc68829
......@@ -2281,13 +2281,18 @@ def local_gpu_contiguous_gpu_contiguous(node):
@register_opt('fast_compile')
@local_optimizer([tensor.extra_ops.CpuContiguous])
@local_optimizer([GpuFromHost, tensor.extra_ops.CpuContiguous])
def local_gpu_contiguous(node):
if isinstance(node.op, tensor.extra_ops.CpuContiguous):
x, = node.inputs
if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs
return [tensor.as_tensor_variable(gpu_contiguous(gpu_x))]
if isinstance(node.op, GpuFromHost):
x, = node.inputs
if x.owner and isinstance(x.owner.op, tensor.extra_ops.CpuContiguous):
gpu_x, = x.owner.inputs
return [gpu_contiguous(gpu_x)]
return False
......
......@@ -111,10 +111,11 @@ def test_local_gpu_contiguous_gpu_contiguous():
def test_local_gpu_contiguous():
a = tensor.fmatrix()
o = tensor.extra_ops.cpu_contiguous(a)
f = theano.function([a], o, mode=mode_with_gpu)
assert 1 == len([node for node in f.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
f([[2.]])
for o in [o, cuda.gpu_from_host(o)]:
f = theano.function([a], o, mode=mode_with_gpu)
assert 1 == len([node for node in f.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
f([[2.]])
def test_local_assert_no_cpu_op():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论