提交 b740b55f authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add GpuJoin and GpuSplit to gpuarray.

上级 409552f5
...@@ -6,7 +6,7 @@ import theano ...@@ -6,7 +6,7 @@ import theano
from theano import Op, Apply from theano import Op, Apply
from theano import tensor, scalar, config from theano import tensor, scalar, config
from theano.scalar import Scalar from theano.scalar import Scalar
from theano.tensor.basic import Alloc from theano.tensor.basic import Alloc, Join, Split
from theano.gof.python25 import any from theano.gof.python25 import any
from theano.gof.utils import MethodNotDefined from theano.gof.utils import MethodNotDefined
...@@ -725,6 +725,62 @@ class GpuReshape(HideC, tensor.Reshape): ...@@ -725,6 +725,62 @@ class GpuReshape(HideC, tensor.Reshape):
out[0] = x.reshape(tuple(shp)) out[0] = x.reshape(tuple(shp))
class GpuJoin(HideC, Join):
def make_node(self, axis, *tensors):
node = Join.make_node(self, axis, *tensors)
return Apply(self, [node.inputs[0]] + map(as_gpuarray_variable,
tensors),
[GpuArrayType(broadcastable=node.outputs[0].broadcastable,
dtype=node.outputs[0].dtype)()])
def perform(self, node, axis_and_tensors, out_):
out, = out_
axis = axis_and_tensors[0]
tensors = axis_and_tensors[1:]
out[0] = pygpu.concatenate(tensors, axis=axis).astype(
node.outputs[0].dtype)
def c_code_cache_version(self):
return (0,)
def c_code(self, node, name, inputs, out_, sub):
copy_to_list = []
restype=pygpu.gpuarray.dtype_to_typecode(node.outputs[0].dtype)
for i, inp in enumerate(inputs[1:]):
copy_to_list.append("als[%s] = &%s->ga;" % (i, inp))
return """
GpuArray **als = (GpuArray **)PyMem_Malloc(sizeof(GpuArray *) * %(n)s);
if (als == NULL) {
PyErr_NoMemory();
%(fail)s
}
%(copy_inputs_to_list)s
Py_XDECREF(%(out)s);
%(out)s = pygpu_concatenate(als, %(n)s, PyInt_AsLong((PyObject *)%(axis)s),
%(restype)s, (PyObject *)&PyGpuArrayType,
pygpu_default_context());
PyMem_Free(als);
if (%(out)s == NULL)
%(fail)s
""" % dict(n=len(inputs[1:]), fail=sub['fail'], out=out_[0],
axis=inputs[0], copy_inputs_to_list='\n'.join(copy_to_list),
restype=restype)
gpu_join = GpuJoin()
class GpuSplit(HideC, Split):
def make_node(self, x, axis, splits):
node = Split.make_node(self, x, axis, splits)
x = as_gpuarray_variable(x)
outs = [GpuArrayType(dtype=o.dtype, broadcastable=o.broadcastable)()
for o in node.outputs]
return Apply(self, [x] + node.inputs[1:], outs)
# we reuse the perform of the CPU op, which is suitable
class GpuEye(GpuKernelBase, Op): class GpuEye(GpuKernelBase, Op):
def __init__(self, dtype=None): def __init__(self, dtype=None):
if dtype is None: if dtype is None:
......
...@@ -21,7 +21,7 @@ from theano.tensor.nnet.conv import ConvOp ...@@ -21,7 +21,7 @@ from theano.tensor.nnet.conv import ConvOp
from theano.sandbox.gpuarray.type import GpuArrayType from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import ( from theano.sandbox.gpuarray.basic_ops import (
host_from_gpu, gpu_from_host, HostFromGpu, host_from_gpu, gpu_from_host, HostFromGpu,
gpu_alloc, GpuAlloc, GpuReshape, GpuEye gpu_alloc, GpuAlloc, GpuReshape, GpuEye, gpu_join,
) )
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
from theano.sandbox.gpuarray.conv import GpuConv from theano.sandbox.gpuarray.conv import GpuConv
...@@ -267,6 +267,18 @@ def local_gpua_specifyShape(node): ...@@ -267,6 +267,18 @@ def local_gpua_specifyShape(node):
return tensor.specify_shape return tensor.specify_shape
@register_opt()
@op_lifter([tensor.Join])
def local_gpua_join(node):
return gpu_join
@register_opt()
@op_lifter([tensor.Split])
def local_gpua_split(node):
return GpuSplit(node.op.len_splits)
@register_opt() @register_opt()
@op_lifter([tensor.Subtensor]) @op_lifter([tensor.Subtensor])
def local_gpua_subtensor(node): def local_gpua_subtensor(node):
......
...@@ -7,7 +7,9 @@ import theano ...@@ -7,7 +7,9 @@ import theano
import theano.tensor as T import theano.tensor as T
from theano.tensor import TensorType from theano.tensor import TensorType
from theano.tensor.basic import alloc from theano.tensor.basic import alloc
from theano.tensor.tests.test_basic import rand, safe_make_node, T_reshape from theano.tensor.tests.test_basic import (
rand, safe_make_node, T_reshape, T_Join_and_Split
)
from theano.tests.unittest_tools import SkipTest from theano.tests.unittest_tools import SkipTest
from numpy.testing.noseclasses import KnownFailureTest from numpy.testing.noseclasses import KnownFailureTest
...@@ -38,7 +40,7 @@ from theano.sandbox.gpuarray.basic_ops import ( ...@@ -38,7 +40,7 @@ from theano.sandbox.gpuarray.basic_ops import (
gpu_from_cuda, gpu_from_cuda,
cuda_from_gpu, HostFromGpu, cuda_from_gpu, HostFromGpu,
GpuFromHost, GpuReshape, GpuFromHost, GpuReshape,
GpuEye) GpuJoin, GpuSplit, GpuEye)
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
utt.seed_rng() utt.seed_rng()
...@@ -339,6 +341,20 @@ class G_reshape(T_reshape): ...@@ -339,6 +341,20 @@ class G_reshape(T_reshape):
assert self.op == GpuReshape assert self.op == GpuReshape
class G_Join_and_Split(T_Join_and_Split):
def setUp(self):
super(G_Join_and_Split, self).setUp()
self.mode = mode_with_gpu.excluding('constant_folding')
self.join_op = GpuJoin
self.split_op = GpuSplit
# Use join instead of MakeVector since there is no MakeVector on GPU
self.make_vector_op = GpuJoin
# this is to avoid errors with limited devices
self.floatX = 'float32'
self.hide_error = theano.config.mode not in ['DebugMode', 'DEBUG_MODE']
self.shared = gpuarray_shared_constructor
def test_gpueye(): def test_gpueye():
def check(dtype, N, M_=None): def check(dtype, N, M_=None):
# Theano does not accept None as a tensor. # Theano does not accept None as a tensor.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论