提交 9c8561fc authored 作者: Frederic Bastien's avatar Frederic Bastien

Add opt that move CpuContiguous to the GPU. fix gh-4400

上级 189b1352
...@@ -2280,6 +2280,17 @@ def local_gpu_contiguous_gpu_contiguous(node): ...@@ -2280,6 +2280,17 @@ def local_gpu_contiguous_gpu_contiguous(node):
return [inp] return [inp]
@register_opt('fast_compile')
@local_optimizer([tensor.extra_ops.CpuContiguous])
def local_gpu_contiguous(node):
if isinstance(node.op, tensor.extra_ops.CpuContiguous):
x, = node.inputs
if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs
return [tensor.as_tensor_variable(gpu_contiguous(gpu_x))]
return False
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Eye]) @local_optimizer([gpu_from_host, tensor.Eye])
def local_gpu_eye(node): def local_gpu_eye(node):
......
...@@ -108,6 +108,15 @@ def test_local_gpu_contiguous_gpu_contiguous(): ...@@ -108,6 +108,15 @@ def test_local_gpu_contiguous_gpu_contiguous():
if isinstance(node.op, basic_ops.GpuContiguous)]) if isinstance(node.op, basic_ops.GpuContiguous)])
def test_local_gpu_contiguous():
a = tensor.fmatrix()
o = tensor.extra_ops.cpu_contiguous(a)
f = theano.function([a], o, mode=mode_with_gpu)
assert 1 == len([node for node in f.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
f([[2.]])
def test_local_assert_no_cpu_op(): def test_local_assert_no_cpu_op():
numpy.random.seed(1) numpy.random.seed(1)
m = numpy.random.uniform(-1, 1, (10, 10)).astype("float32") m = numpy.random.uniform(-1, 1, (10, 10)).astype("float32")
......
...@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayConstant, get_context, ...@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayConstant, get_context,
from .basic_ops import (as_gpuarray_variable, infer_context_name, from .basic_ops import (as_gpuarray_variable, infer_context_name,
host_from_gpu, GpuToGpu, host_from_gpu, GpuToGpu,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous, GpuSplit, GpuContiguous, gpu_contiguous,
GpuAlloc, GpuAllocEmpty, GpuReshape, GpuAlloc, GpuAllocEmpty, GpuReshape,
GpuEye, gpu_join, GpuJoin) GpuEye, gpu_join, GpuJoin)
from .blas import (gpu_dot22, GpuGemv, GpuGemm, GpuGer, from .blas import (gpu_dot22, GpuGemv, GpuGemm, GpuGer,
...@@ -330,6 +330,12 @@ def local_gpu_contiguous_gpu_contiguous(node): ...@@ -330,6 +330,12 @@ def local_gpu_contiguous_gpu_contiguous(node):
return [inp] return [inp]
@register_opt('fast_compile')
@op_lifter([tensor.extra_ops.CpuContiguous])
def local_gpu_contiguous(node, context_name):
return gpu_contiguous
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.Reshape]) @op_lifter([tensor.Reshape])
def local_gpureshape(node, context_name): def local_gpureshape(node, context_name):
......
...@@ -64,6 +64,15 @@ def test_local_gpu_contiguous_gpu_contiguous(): ...@@ -64,6 +64,15 @@ def test_local_gpu_contiguous_gpu_contiguous():
if isinstance(node.op, basic_ops.GpuContiguous)]) if isinstance(node.op, basic_ops.GpuContiguous)])
def test_local_gpu_contiguous():
a = tensor.fmatrix()
o = tensor.extra_ops.cpu_contiguous(a)
f = theano.function([a], o, mode=mode_with_gpu)
assert 1 == len([node for node in f.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
f([[2.]])
def test_flatten(): def test_flatten():
m = theano.tensor.fmatrix() m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu) f = theano.function([m], m.flatten(), mode=mode_with_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论