提交 b740b55f authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add GpuJoin and GpuSplit to gpuarray.

上级 409552f5
......@@ -6,7 +6,7 @@ import theano
from theano import Op, Apply
from theano import tensor, scalar, config
from theano.scalar import Scalar
from theano.tensor.basic import Alloc
from theano.tensor.basic import Alloc, Join, Split
from theano.gof.python25 import any
from theano.gof.utils import MethodNotDefined
......@@ -725,6 +725,62 @@ class GpuReshape(HideC, tensor.Reshape):
out[0] = x.reshape(tuple(shp))
class GpuJoin(HideC, Join):
def make_node(self, axis, *tensors):
node = Join.make_node(self, axis, *tensors)
return Apply(self, [node.inputs[0]] + map(as_gpuarray_variable,
tensors),
[GpuArrayType(broadcastable=node.outputs[0].broadcastable,
dtype=node.outputs[0].dtype)()])
def perform(self, node, axis_and_tensors, out_):
out, = out_
axis = axis_and_tensors[0]
tensors = axis_and_tensors[1:]
out[0] = pygpu.concatenate(tensors, axis=axis).astype(
node.outputs[0].dtype)
def c_code_cache_version(self):
return (0,)
def c_code(self, node, name, inputs, out_, sub):
copy_to_list = []
restype=pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
for i, inp in enumerate(inputs[1:]):
copy_to_list.append("als[%s] = &%s->ga;" % (i, inp))
return """
GpuArray **als = (GpuArray **)PyMem_Malloc(sizeof(GpuArray *) * %(n)s);
if (als == NULL) {
PyErr_NoMemory();
%(fail)s
}
%(copy_inputs_to_list)s
Py_XDECREF(%(out)s);
%(out)s = pygpu_concatenate(als, %(n)s, PyInt_AsLong((PyObject *)%(axis)s),
%(restype)s, (PyObject *)&PyGpuArrayType,
pygpu_default_context());
PyMem_Free(als);
if (%(out)s == NULL)
%(fail)s
""" % dict(n=len(inputs[1:]), fail=sub['fail'], out=out_[0],
axis=inputs[0], copy_inputs_to_list='\n'.join(copy_to_list),
restype=restype)
gpu_join = GpuJoin()
class GpuSplit(HideC, Split):
def make_node(self, x, axis, splits):
node = Split.make_node(self, x, axis, splits)
x = as_gpuarray_variable(x)
outs = [GpuArrayType(dtype=o.dtype, broadcastable=o.broadcastable)()
for o in node.outputs]
return Apply(self, [x] + node.inputs[1:], outs)
# we reuse the perform of the CPU op, which is suitable
class GpuEye(GpuKernelBase, Op):
def __init__(self, dtype=None):
if dtype is None:
......
......@@ -21,7 +21,7 @@ from theano.tensor.nnet.conv import ConvOp
from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import (
host_from_gpu, gpu_from_host, HostFromGpu,
gpu_alloc, GpuAlloc, GpuReshape, GpuEye
gpu_alloc, GpuAlloc, GpuReshape, GpuEye, gpu_join,
)
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
from theano.sandbox.gpuarray.conv import GpuConv
......@@ -267,6 +267,18 @@ def local_gpua_specifyShape(node):
return tensor.specify_shape
@register_opt()
@op_lifter([tensor.Join])
def local_gpua_join(node):
return gpu_join
@register_opt()
@op_lifter([tensor.Split])
def local_gpua_split(node):
return GpuSplit(node.op.len_splits)
@register_opt()
@op_lifter([tensor.Subtensor])
def local_gpua_subtensor(node):
......
......@@ -7,7 +7,9 @@ import theano
import theano.tensor as T
from theano.tensor import TensorType
from theano.tensor.basic import alloc
from theano.tensor.tests.test_basic import rand, safe_make_node, T_reshape
from theano.tensor.tests.test_basic import (
rand, safe_make_node, T_reshape, T_Join_and_Split
)
from theano.tests.unittest_tools import SkipTest
from numpy.testing.noseclasses import KnownFailureTest
......@@ -38,7 +40,7 @@ from theano.sandbox.gpuarray.basic_ops import (
gpu_from_cuda,
cuda_from_gpu, HostFromGpu,
GpuFromHost, GpuReshape,
GpuEye)
GpuJoin, GpuSplit, GpuEye)
from theano.tests import unittest_tools as utt
utt.seed_rng()
......@@ -339,6 +341,20 @@ class G_reshape(T_reshape):
assert self.op == GpuReshape
class G_Join_and_Split(T_Join_and_Split):
def setUp(self):
super(G_Join_and_Split, self).setUp()
self.mode = mode_with_gpu.excluding('constant_folding')
self.join_op = GpuJoin
self.split_op = GpuSplit
# Use join instead of MakeVector since there is no MakeVector on GPU
self.make_vector_op = GpuJoin
# this is to avoid errors with limited devices
self.floatX = 'float32'
self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE']
self.shared = gpuarray_shared_constructor
def test_gpueye():
def check(dtype, N, M_=None):
# Theano does not accept None as a tensor.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论