提交 0ad8e57b authored 作者: Frederic's avatar Frederic

Opt GpuContiguous(GpuContiguous(x))

上级 f1b0fac7
...@@ -20,6 +20,7 @@ from theano.gof.opt import LocalMetaOptimizer ...@@ -20,6 +20,7 @@ from theano.gof.opt import LocalMetaOptimizer
from theano.sandbox.cuda.basic_ops import ( from theano.sandbox.cuda.basic_ops import (
gpu_eye, gpu_contiguous, gpu_eye, gpu_contiguous,
gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu, gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
GpuContiguous,
GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten, GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten,
GpuSubtensor, GpuAdvancedSubtensor1, GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
...@@ -1997,6 +1998,19 @@ def local_gpualloc_memset_0(node): ...@@ -1997,6 +1998,19 @@ def local_gpualloc_memset_0(node):
return [new_out] return [new_out]
@register_opt()
@local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node):
"""
gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
"""
if isinstance(node.op, GpuContiguous):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous):
return [inp]
@register_opt() @register_opt()
@local_optimizer([gpu_from_host, tensor.Eye]) @local_optimizer([gpu_from_host, tensor.Eye])
def local_gpu_eye(node): def local_gpu_eye(node):
......
...@@ -79,6 +79,18 @@ def test_local_remove_all_assert(): ...@@ -79,6 +79,18 @@ def test_local_remove_all_assert():
assert len(a_op) == 1 assert len(a_op) == 1
def test_local_gpu_contiguous_gpu_contiguous():
a = tensor.fmatrix()
o1 = basic_ops.gpu_contiguous(a)
o2 = basic_ops.gpu_contiguous(o1)
f1 = theano.function([a], o1, mode=mode_with_gpu)
f2 = theano.function([a], o2, mode=mode_with_gpu)
assert 1 == len([node for node in f1.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_int_pow(): def test_int_pow():
a = CudaNdarrayType([False])() a = CudaNdarrayType([False])()
......
...@@ -20,7 +20,7 @@ from theano.tensor.nnet.conv import ConvOp ...@@ -20,7 +20,7 @@ from theano.tensor.nnet.conv import ConvOp
from .type import GpuArrayType, GpuArrayConstant from .type import GpuArrayType, GpuArrayConstant
from .basic_ops import (host_from_gpu, gpu_from_host, from .basic_ops import (host_from_gpu, gpu_from_host,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
GpuSplit, GpuSplit, GpuContiguous,
gpu_alloc, GpuAlloc, GpuReshape, gpu_alloc, GpuAlloc, GpuReshape,
GpuEye, gpu_join, GpuJoin) GpuEye, gpu_join, GpuJoin)
from .blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer from .blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
...@@ -205,6 +205,19 @@ def local_gpualloc_memset_0(node): ...@@ -205,6 +205,19 @@ def local_gpualloc_memset_0(node):
return [new_out] return [new_out]
@register_opt()
@local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node):
"""
gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
"""
if isinstance(node.op, GpuContiguous):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous):
return [inp]
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.Reshape]) @op_lifter([tensor.Reshape])
def local_gpureshape(node): def local_gpureshape(node):
......
...@@ -7,6 +7,7 @@ from theano.tests.unittest_tools import SkipTest ...@@ -7,6 +7,7 @@ from theano.tests.unittest_tools import SkipTest
from theano.tensor.tests import test_basic from theano.tensor.tests import test_basic
import theano.sandbox.gpuarray import theano.sandbox.gpuarray
from theano.sandbox.gpuarray import basic_ops
from ..type import GpuArrayType, gpuarray_shared_constructor from ..type import GpuArrayType, gpuarray_shared_constructor
from ..basic_ops import (GpuAlloc, GpuReshape, gpu_alloc, from ..basic_ops import (GpuAlloc, GpuReshape, gpu_alloc,
gpu_from_host, host_from_gpu) gpu_from_host, host_from_gpu)
...@@ -48,6 +49,18 @@ def test_local_remove_all_assert(): ...@@ -48,6 +49,18 @@ def test_local_remove_all_assert():
assert len(a_op) == 1 assert len(a_op) == 1
def test_local_gpu_contiguous_gpu_contiguous():
a = tensor.fmatrix()
o1 = basic_ops.gpu_contiguous(a)
o2 = basic_ops.gpu_contiguous(o1)
f1 = theano.function([a], o1, mode=mode_with_gpu)
f2 = theano.function([a], o2, mode=mode_with_gpu)
assert 1 == len([node for node in f1.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_flatten(): def test_flatten():
m = theano.tensor.fmatrix() m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu) f = theano.function([m], m.flatten(), mode=mode_with_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论