提交 be56eff4 authored 作者: Colin Raffel's avatar Colin Raffel

Adding GpuSplit op and opt

上级 811c4f88
...@@ -3229,6 +3229,16 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -3229,6 +3229,16 @@ class GpuJoin(tensor.Join, GpuOp):
gpu_join = GpuJoin() gpu_join = GpuJoin()
class GpuSplit(tensor.Split, GpuOp):
def make_node(self, x, axis, splits):
assert isinstance(x.type, CudaNdarrayType)
node = tensor.Split.make_node(self, x, axis, splits)
outs = [CudaNdarrayType(dtype=o.dtype,
broadcastable=o.type.broadcastable)()
for o in node.outputs]
return Apply(self, [x] + node.inputs[1:], outs)
class GpuAlloc(GpuOp): class GpuAlloc(GpuOp):
"""Implement Alloc on the gpu. """Implement Alloc on the gpu.
......
...@@ -24,7 +24,7 @@ from theano.sandbox.cuda.basic_ops import ( ...@@ -24,7 +24,7 @@ from theano.sandbox.cuda.basic_ops import (
GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten, GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce, GpuFlatten,
GpuSubtensor, GpuAdvancedSubtensor1, GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape) GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit)
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar, from theano.sandbox.cuda.blas import (gpu_dot22, gpu_dot22scalar,
gpu_gemm_inplace, gpu_gemm_no_inplace, GpuConv, gpu_gemm_inplace, gpu_gemm_no_inplace, GpuConv,
...@@ -299,6 +299,25 @@ def local_gpu_elemwise_1(node): ...@@ -299,6 +299,25 @@ def local_gpu_elemwise_1(node):
return False return False
@register_opt()
@local_optimizer([tensor.Split, gpu_from_host])
def local_gpu_split(node):
if isinstance(node.op, tensor.Split):
input = node.inputs[0]
if input.owner and isinstance(input.owner.op, HostFromGpu):
new_op = GpuSplit(node.op.len_splits)
split_res = new_op(gpu_from_host(input), *node.inputs[1:])
return [host_from_gpu(o) for o in split_res]
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
if host_input.owner and isinstance(host_input.owner.op, tensor.Split):
split_node = host_input.owner
new_op = GpuSplit(split_node.op.len_splits)
return [new_op(gpu_from_host(split_node.inputs[0]),
*split_node.inputs[1:])[host_input.index]]
return False
@register_opt() @register_opt()
@local_optimizer([tensor.DimShuffle, gpu_from_host]) @local_optimizer([tensor.DimShuffle, gpu_from_host])
def local_gpu_dimshuffle_0(node): def local_gpu_dimshuffle_0(node):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论