Add GpuSubtensor.

1584e29d · Arnaud Bergeron · 382d2ed1 · 1584e29d · 1584e29d
--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -499,3 +499,99 @@ class GpuAlloc(Op):
        return True
 gpu_alloc = GpuAlloc()
+class GpuSubtensor(tensor.Subtensor):
+    def make_node(self, x, *inputs):
+        assert isinstance(x.type, GpuArrayType)
+        rval = tensor.Subtensor.make_node(self, x, *inputs)
+        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+                             broadcastable=rval.outputs[0].type.broadcastable)
+        return Apply(self, [x] + rval.inputs[1:], [otype()])
+    def perform(self, node, inputs, out_):
+        out, = out_
+        x = inputs[0]
+        indices = list(reversed(inputs[1:]))
+        def convert(entry):
+            if isinstance(entry, Type):
+                rval = indices.pop()
+                if sys.version_info < (2, 5):
+                    # Before Python 2.5, PySlice_GetIndicesEx requires
+                    # Python int to be passed.
+                    rval_ = int(rval)
+                    if rval_ != rval:
+                        raise IndexError((
+                            "Invalid value for indexing: %s. "
+                            "That value may be too big.") % rval)
+                    return rval_
+                return rval
+            elif isinstance(entry, slice):
+                return slice(convert(entry.start),
+                             convert(entry.stop),
+                             convert(entry.step))
+            else:
+                return entry
+            cdata = tuple(map(convert, self.idx_list))
+            if len(cdata) == 1:
+                cdata = cdata[0]
+            out[0] = x.__getitem__(cdata)
+    def c_code(self, node, name, inputs, outputs, sub):
+        view_ndim = node.outputs[0].ndim
+        indices = inputs[1:]
+        sio = StringIO.StringIO("""
+        ssize_t %(name)s_starts[%(sz)s];
+        ssize_t %(name)s_stops[%(sz)s];
+        ssize_t %(name)s_steps[%(sz)s];
+        """ % dict(name=name, sz=len(self.idx_list)))
+        ndim = 0
+        for i, idx in enumerate(self.idx_list):
+            if isinstance(idx, Type):
+                # Index by an input number
+                print >>sio, """
+                %(name)s_starts[%(i)s] = %(start)s;
+                %(name)s_steps[%(i)s] = %(step)s;
+                """ % dict(name=name, i=i, start=indices.pop(), step=0)
+            elif isinstance(idx, slice):
+                # index by a fixed slice
+                print >>sio, """
+                %(name)s_starts[%(i)s] = %(start)s;
+                %(name)s_stops[%(i)s] = %(stop)s;
+                %(name)s_steps[%(i)s] = %(step)s;
+                """ % dict(i=i, name=name, start=idx.start, stop=idx.stop,
+                           step=idx.step)
+                ndim += 1
+            else:
+                # Index by a fixed number
+                print >>sio, """
+                %(name)s_starts[%(i)s] = %(start)s;
+                %(name)s_steps[%(i)s] = %(step)s;
+                """ % dict(name=name, i=i, start=idx, step=0)
+        print >>sio, """
+        if (%(out)s) {
+            // Try to reuse the python object.
+            GpuArray_clear(&%(out)s->ga);
+        } else {
+            %(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context);
+        }
+        if (!%(out)s) { %(fail)s }
+        int %(name)s_err;
+        %(name)s_err = GpuArray_index(&%(out)s->ga, &%(inp)s->ga,
+                                      %(name)s_starts, %(name)s_steps,
+                                      %(name)s_stops)
+        if (%(name)s_err != GA_NO_ERROR) {
+            Py_DECREF(%(out)s); %(out)s = NULL;
+            PyErr_SetString(PyExc_RuntimeError, "Error during index");
+            %(fail)s
+        }
+""" % dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0])
+        return sio.getvalue()
+    def c_code_cache_version(self):
+        return () # for testing
--- a/theano/sandbox/gpuarray/tests/test_basic_ops.py
+++ b/theano/sandbox/gpuarray/tests/test_basic_ops.py
@@ -9,6 +9,7 @@ from theano.compile import DeepCopyOp
 from theano.tensor.tests.test_basic import safe_make_node
 from theano.tests.unittest_tools import SkipTest
 from numpy.testing.noseclasses import KnownFailureTest
+from theano.tensor.tests.test_subtensor import T_subtensor
 import theano.sandbox.gpuarray
@@ -33,7 +34,8 @@ from theano.sandbox.gpuarray.type import (GpuArrayType,
                                          gpuarray_shared_constructor)
 from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host,
                                               gpu_alloc, gpu_from_cuda,
-                                               cuda_from_gpu)
+                                               cuda_from_gpu, HostFromGpu,
+                                               GpuFromHost, GpuSubtensor)
 from theano.tests import unittest_tools as utt
 utt.seed_rng()
@@ -328,3 +330,18 @@ def test_deep_copy():
    res = f(a)
    assert GpuArrayType.values_eq(res, a)
+class G_subtensor(T_subtensor):
+    def shortDescription(self):
+        return None
+    shared = staticmethod(gpuarray_shared_constructor)
+    sub = GpuSubtensor
+    mode = mode_with_gpu
+    dtype = 'float32' # avoid errors on gpus which do not support float64
+    ignore_topo = (HostFromGpu, GpuFromHost)
+    fast_compile = False
+    ops = (GpuSubtensor,)
+    def __init__(self, name):
+        T_subtensor.__init__(self, name)