提交 0ad8e57b authored 作者: Frederic's avatar Frederic

Opt GpuContiguous(GpuContiguous(x))

上级 f1b0fac7
......@@ -20,6 +20,7 @@ from theano.gof.opt import LocalMetaOptimizer
from theano.sandbox.cuda.basic_ops import (
gpu_eye, gpu_contiguous,
gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
GpuContiguous,
GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten,
GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
......@@ -1997,6 +1998,19 @@ def local_gpualloc_memset_0(node):
return [new_out]
@register_opt()
@local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node):
"""
gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
"""
if isinstance(node.op, GpuContiguous):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous):
return [inp]
@register_opt()
@local_optimizer([gpu_from_host, tensor.Eye])
def local_gpu_eye(node):
......
......@@ -79,6 +79,18 @@ def test_local_remove_all_assert():
assert len(a_op) == 1
def test_local_gpu_contiguous_gpu_contiguous():
a = tensor.fmatrix()
o1 = basic_ops.gpu_contiguous(a)
o2 = basic_ops.gpu_contiguous(o1)
f1 = theano.function([a], o1, mode=mode_with_gpu)
f2 = theano.function([a], o2, mode=mode_with_gpu)
assert 1 == len([node for node in f1.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_int_pow():
a = CudaNdarrayType([False])()
......
......@@ -20,7 +20,7 @@ from theano.tensor.nnet.conv import ConvOp
from .type import GpuArrayType, GpuArrayConstant
from .basic_ops import (host_from_gpu, gpu_from_host,
HostFromGpu, GpuFromHost,
GpuSplit,
GpuSplit, GpuContiguous,
gpu_alloc, GpuAlloc, GpuReshape,
GpuEye, gpu_join, GpuJoin)
from .blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
......@@ -205,6 +205,19 @@ def local_gpualloc_memset_0(node):
return [new_out]
@register_opt()
@local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node):
"""
gpu_contiguous(gpu_contiguous(x)) -> gpu_contiguous(x)
"""
if isinstance(node.op, GpuContiguous):
inp = node.inputs[0]
if inp.owner and isinstance(inp.owner.op, GpuContiguous):
return [inp]
@register_opt('fast_compile')
@op_lifter([tensor.Reshape])
def local_gpureshape(node):
......
......@@ -7,6 +7,7 @@ from theano.tests.unittest_tools import SkipTest
from theano.tensor.tests import test_basic
import theano.sandbox.gpuarray
from theano.sandbox.gpuarray import basic_ops
from ..type import GpuArrayType, gpuarray_shared_constructor
from ..basic_ops import (GpuAlloc, GpuReshape, gpu_alloc,
gpu_from_host, host_from_gpu)
......@@ -48,6 +49,18 @@ def test_local_remove_all_assert():
assert len(a_op) == 1
def test_local_gpu_contiguous_gpu_contiguous():
a = tensor.fmatrix()
o1 = basic_ops.gpu_contiguous(a)
o2 = basic_ops.gpu_contiguous(o1)
f1 = theano.function([a], o1, mode=mode_with_gpu)
f2 = theano.function([a], o2, mode=mode_with_gpu)
assert 1 == len([node for node in f1.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
assert 1 == len([node for node in f2.maker.fgraph.toposort()
if isinstance(node.op, basic_ops.GpuContiguous)])
def test_flatten():
m = theano.tensor.fmatrix()
f = theano.function([m], m.flatten(), mode=mode_with_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论