提交 1584e29d authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add GpuSubtensor.

上级 382d2ed1
......@@ -499,3 +499,99 @@ class GpuAlloc(Op):
return True
gpu_alloc = GpuAlloc()
class GpuSubtensor(tensor.Subtensor):
def make_node(self, x, *inputs):
assert isinstance(x.type, GpuArrayType)
rval = tensor.Subtensor.make_node(self, x, *inputs)
otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
broadcastable=rval.outputs[0].type.broadcastable)
return Apply(self, [x] + rval.inputs[1:], [otype()])
def perform(self, node, inputs, out_):
out, = out_
x = inputs[0]
indices = list(reversed(inputs[1:]))
def convert(entry):
if isinstance(entry, Type):
rval = indices.pop()
if sys.version_info < (2, 5):
# Before Python 2.5, PySlice_GetIndicesEx requires
# Python int to be passed.
rval_ = int(rval)
if rval_ != rval:
raise IndexError((
"Invalid value for indexing: %s. "
"That value may be too big.") % rval)
return rval_
return rval
elif isinstance(entry, slice):
return slice(convert(entry.start),
convert(entry.stop),
convert(entry.step))
else:
return entry
cdata = tuple(map(convert, self.idx_list))
if len(cdata) == 1:
cdata = cdata[0]
out[0] = x.__getitem__(cdata)
def c_code(self, node, name, inputs, outputs, sub):
view_ndim = node.outputs[0].ndim
indices = inputs[1:]
sio = StringIO.StringIO("""
ssize_t %(name)s_starts[%(sz)s];
ssize_t %(name)s_stops[%(sz)s];
ssize_t %(name)s_steps[%(sz)s];
""" % dict(name=name, sz=len(self.idx_list)))
ndim = 0
for i, idx in enumerate(self.idx_list):
if isinstance(idx, Type):
# Index by an input number
print >>sio, """
%(name)s_starts[%(i)s] = %(start)s;
%(name)s_steps[%(i)s] = %(step)s;
""" % dict(name=name, i=i, start=indices.pop(), step=0)
elif isinstance(idx, slice):
# index by a fixed slice
print >>sio, """
%(name)s_starts[%(i)s] = %(start)s;
%(name)s_stops[%(i)s] = %(stop)s;
%(name)s_steps[%(i)s] = %(step)s;
""" % dict(i=i, name=name, start=idx.start, stop=idx.stop,
step=idx.step)
ndim += 1
else:
# Index by a fixed number
print >>sio, """
%(name)s_starts[%(i)s] = %(start)s;
%(name)s_steps[%(i)s] = %(step)s;
""" % dict(name=name, i=i, start=idx, step=0)
print >>sio, """
if (%(out)s) {
// Try to reuse the python object.
GpuArray_clear(&%(out)s->ga);
} else {
%(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context);
}
if (!%(out)s) { %(fail)s }
int %(name)s_err;
%(name)s_err = GpuArray_index(&%(out)s->ga, &%(inp)s->ga,
%(name)s_starts, %(name)s_steps,
%(name)s_stops)
if (%(name)s_err != GA_NO_ERROR) {
Py_DECREF(%(out)s); %(out)s = NULL;
PyErr_SetString(PyExc_RuntimeError, "Error during index");
%(fail)s
}
""" % dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0])
return sio.getvalue()
def c_code_cache_version(self):
return () # for testing
......@@ -9,6 +9,7 @@ from theano.compile import DeepCopyOp
from theano.tensor.tests.test_basic import safe_make_node
from theano.tests.unittest_tools import SkipTest
from numpy.testing.noseclasses import KnownFailureTest
from theano.tensor.tests.test_subtensor import T_subtensor
import theano.sandbox.gpuarray
......@@ -33,7 +34,8 @@ from theano.sandbox.gpuarray.type import (GpuArrayType,
gpuarray_shared_constructor)
from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host,
gpu_alloc, gpu_from_cuda,
cuda_from_gpu)
cuda_from_gpu, HostFromGpu,
GpuFromHost, GpuSubtensor)
from theano.tests import unittest_tools as utt
utt.seed_rng()
......@@ -328,3 +330,18 @@ def test_deep_copy():
res = f(a)
assert GpuArrayType.values_eq(res, a)
class G_subtensor(T_subtensor):
def shortDescription(self):
return None
shared = staticmethod(gpuarray_shared_constructor)
sub = GpuSubtensor
mode = mode_with_gpu
dtype = 'float32' # avoid errors on gpus which do not support float64
ignore_topo = (HostFromGpu, GpuFromHost)
fast_compile = False
ops = (GpuSubtensor,)
def __init__(self, name):
T_subtensor.__init__(self, name)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论