提交 2f6d63b3 authored 作者: Frederic Bastien's avatar Frederic Bastien

Add the reverse version of local_gpu_contiguous

上级 cbc68829
...@@ -2281,13 +2281,18 @@ def local_gpu_contiguous_gpu_contiguous(node): ...@@ -2281,13 +2281,18 @@ def local_gpu_contiguous_gpu_contiguous(node):
@register_opt('fast_compile') @register_opt('fast_compile')
@local_optimizer([tensor.extra_ops.CpuContiguous]) @local_optimizer([GpuFromHost, tensor.extra_ops.CpuContiguous])
def local_gpu_contiguous(node): def local_gpu_contiguous(node):
if isinstance(node.op, tensor.extra_ops.CpuContiguous): if isinstance(node.op, tensor.extra_ops.CpuContiguous):
x, = node.inputs x, = node.inputs
if x.owner and isinstance(x.owner.op, HostFromGpu): if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs gpu_x, = x.owner.inputs
return [tensor.as_tensor_variable(gpu_contiguous(gpu_x))] return [tensor.as_tensor_variable(gpu_contiguous(gpu_x))]
if isinstance(node.op, GpuFromHost):
x, = node.inputs
if x.owner and isinstance(x.owner.op, tensor.extra_ops.CpuContiguous):
gpu_x, = x.owner.inputs
return [gpu_contiguous(gpu_x)]
return False return False
......
...@@ -111,6 +111,7 @@ def test_local_gpu_contiguous_gpu_contiguous(): ...@@ -111,6 +111,7 @@ def test_local_gpu_contiguous_gpu_contiguous():
def test_local_gpu_contiguous(): def test_local_gpu_contiguous():
a = tensor.fmatrix() a = tensor.fmatrix()
o = tensor.extra_ops.cpu_contiguous(a) o = tensor.extra_ops.cpu_contiguous(a)
for o in [o, cuda.gpu_from_host(o)]:
f = theano.function([a], o, mode=mode_with_gpu) f = theano.function([a], o, mode=mode_with_gpu)
assert 1 == len([node for node in f.maker.fgraph.toposort() assert 1 == len([node for node in f.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)]) if isinstance(node.op, basic_ops.GpuContiguous)])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论