提交 9c8561fc authored 作者: Frederic Bastien's avatar Frederic Bastien

Add opt that move CpuContiguous to the GPU. fix gh-4400

上级 189b1352
......@@ -2280,6 +2280,17 @@ def local_gpu_contiguous_gpu_contiguous(node):
return [inp]
@register_opt('fast_compile')
@local_optimizer([tensor.extra_ops.CpuContiguous])
def local_gpu_contiguous(node):
if isinstance(node.op, tensor.extra_ops.CpuContiguous):
x, = node.inputs
if x.owner and isinstance(x.owner.op, HostFromGpu):
gpu_x, = x.owner.inputs
return [tensor.as_tensor_variable(gpu_contiguous(gpu_x))]
return False
@register_opt()
@local_optimizer([gpu_from_host, tensor.Eye])
def local_gpu_eye(node):
......
......@@ -108,6 +108,15 @@ def test_local_gpu_contiguous_gpu_contiguous():
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_local_gpu_contiguous():
a = tensor.fmatrix()
o = tensor.extra_ops.cpu_contiguous(a)
f = theano.function([a], o, mode=mode_with_gpu)
assert 1 == len([node for node in f.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
f([[2.]])
def test_local_assert_no_cpu_op():
numpy.random.seed(1)
m = numpy.random.uniform(-1, 1, (10, 10)).astype("float32")
......
......@@ -28,7 +28,7 @@ from .type import (GpuArrayType, GpuArrayConstant, get_context,
from .basic_ops import (as_gpuarray_variable, infer_context_name,
host_from_gpu, GpuToGpu,
HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous,
GpuSplit, GpuContiguous, gpu_contiguous,
GpuAlloc, GpuAllocEmpty, GpuReshape,
GpuEye, gpu_join, GpuJoin)
from .blas import (gpu_dot22, GpuGemv, GpuGemm, GpuGer,
......@@ -330,6 +330,12 @@ def local_gpu_contiguous_gpu_contiguous(node):
return [inp]
@register_opt('fast_compile')
@op_lifter([tensor.extra_ops.CpuContiguous])
def local_gpu_contiguous(node, context_name):
return gpu_contiguous
@register_opt('fast_compile')
@op_lifter([tensor.Reshape])
def local_gpureshape(node, context_name):
......
......@@ -64,6 +64,15 @@ def test_local_gpu_contiguous_gpu_contiguous():
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_local_gpu_contiguous():
a = tensor.fmatrix()
o = tensor.extra_ops.cpu_contiguous(a)
f = theano.function([a], o, mode=mode_with_gpu)
assert 1 == len([node for node in f.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
f([[2.]])
def test_flatten():
m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论