Mirror tensor and move the subtensor op in its own file.

5e1935ba · Arnaud Bergeron · d4d7141a · 5e1935ba · 5e1935ba · 5e1935ba
--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -499,99 +499,3 @@ class GpuAlloc(Op):
        return True
 gpu_alloc = GpuAlloc()
-class GpuSubtensor(tensor.Subtensor):
-    def make_node(self, x, *inputs):
-        assert isinstance(x.type, GpuArrayType)
-        rval = tensor.Subtensor.make_node(self, x, *inputs)
-        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
-                             broadcastable=rval.outputs[0].type.broadcastable)
-        return Apply(self, [x] + rval.inputs[1:], [otype()])
-    def perform(self, node, inputs, out_):
-        out, = out_
-        x = inputs[0]
-        indices = list(reversed(inputs[1:]))
-        def convert(entry):
-            if isinstance(entry, Type):
-                rval = indices.pop()
-                if sys.version_info < (2, 5):
-                    # Before Python 2.5, PySlice_GetIndicesEx requires
-                    # Python int to be passed.
-                    rval_ = int(rval)
-                    if rval_ != rval:
-                        raise IndexError((
-                            "Invalid value for indexing: %s. "
-                            "That value may be too big.") % rval)
-                    return rval_
-                return rval
-            elif isinstance(entry, slice):
-                return slice(convert(entry.start),
-                             convert(entry.stop),
-                             convert(entry.step))
-            else:
-                return entry
-            cdata = tuple(map(convert, self.idx_list))
-            if len(cdata) == 1:
-                cdata = cdata[0]
-            out[0] = x.__getitem__(cdata)
-    def c_code(self, node, name, inputs, outputs, sub):
-        view_ndim = node.outputs[0].ndim
-        indices = inputs[1:]
-        sio = StringIO.StringIO("""
-        ssize_t %(name)s_starts[%(sz)s];
-        ssize_t %(name)s_stops[%(sz)s];
-        ssize_t %(name)s_steps[%(sz)s];
-        """ % dict(name=name, sz=len(self.idx_list)))
-        ndim = 0
-        for i, idx in enumerate(self.idx_list):
-            if isinstance(idx, Type):
-                # Index by an input number
-                print >>sio, """
-                %(name)s_starts[%(i)s] = %(start)s;
-                %(name)s_steps[%(i)s] = %(step)s;
-                """ % dict(name=name, i=i, start=indices.pop(), step=0)
-            elif isinstance(idx, slice):
-                # index by a fixed slice
-                print >>sio, """
-                %(name)s_starts[%(i)s] = %(start)s;
-                %(name)s_stops[%(i)s] = %(stop)s;
-                %(name)s_steps[%(i)s] = %(step)s;
-                """ % dict(i=i, name=name, start=idx.start, stop=idx.stop,
-                           step=idx.step)
-                ndim += 1
-            else:
-                # Index by a fixed number
-                print >>sio, """
-                %(name)s_starts[%(i)s] = %(start)s;
-                %(name)s_steps[%(i)s] = %(step)s;
-                """ % dict(name=name, i=i, start=idx, step=0)
-        print >>sio, """
-        if (%(out)s) {
-            // Try to reuse the python object.
-            GpuArray_clear(&%(out)s->ga);
-        } else {
-            %(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context);
-        }
-        if (!%(out)s) { %(fail)s }
-        int %(name)s_err;
-        %(name)s_err = GpuArray_index(&%(out)s->ga, &%(inp)s->ga,
-                                      %(name)s_starts, %(name)s_steps,
-                                      %(name)s_stops)
-        if (%(name)s_err != GA_NO_ERROR) {
-            Py_DECREF(%(out)s); %(out)s = NULL;
-            PyErr_SetString(PyExc_RuntimeError, "Error during index");
-            %(fail)s
-        }
-""" % dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0])
-        return sio.getvalue()
-    def c_code_cache_version(self):
-        return (0,)
--- a/theano/sandbox/gpuarray/subtensor.py
+++ b/theano/sandbox/gpuarray/subtensor.py
+import numpy
+import theano
+from theano import tensor
+from theano.tensor.subtensor import Subtensor, get_idx_list
+from theano.gof.python25 import all, any
+try:
+    import pygpu
+    from pygpu import gpuarray
+except ImportError:
+    pass
+from type import GpuArrayType
+from basic_ops import as_gpuarray_variable, zeros_like
+class GpuSubtensor(Subtensor):
+    def make_node(self, x, *inputs):
+        rval = tensor.Subtensor.make_node(self, x, *inputs)
+        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+                             broadcastable=rval.outputs[0].type.broadcastable)
+        x = as_gpuarray_variable(x)
+        return Apply(self, [x] + rval.inputs[1:], [otype()])
+    def perform(self, node, inputs, out_):
+        out, = out_
+        x = inputs[0]
+        if self.perform_cache_cdata is not None:
+            out[0] = x.__getitem__(self.perform_cache_cdata)
+            return
+        cdata = get_idx_list(inputs, self.idx_list)
+        if len(cdata) == 1:
+            cdata = cdata[0]
+        if len(inputs) == 1:
+            self.perform_cache_cdata = cdata
+        out[0] = x.__getitem__(cdata)
+    def c_code(self, node, name, inputs, outputs, sub):
+        view_ndim = node.outputs[0].ndim
+        indices = inputs[1:]
+        sio = StringIO.StringIO("""
+        ssize_t %(name)s_starts[%(sz)s];
+        ssize_t %(name)s_stops[%(sz)s];
+        ssize_t %(name)s_steps[%(sz)s];
+        """ % dict(name=name, sz=len(self.idx_list)))
+        ndim = 0
+        for i, idx in enumerate(self.idx_list):
+            if isinstance(idx, Type):
+                # Index by an input number
+                print >>sio, """
+                %(name)s_starts[%(i)s] = %(start)s;
+                %(name)s_steps[%(i)s] = %(step)s;
+                """ % dict(name=name, i=i, start=indices.pop(), step=0)
+            elif isinstance(idx, slice):
+                # index by a fixed slice
+                print >>sio, """
+                %(name)s_starts[%(i)s] = %(start)s;
+                %(name)s_stops[%(i)s] = %(stop)s;
+                %(name)s_steps[%(i)s] = %(step)s;
+                """ % dict(i=i, name=name, start=idx.start, stop=idx.stop,
+                           step=idx.step)
+                ndim += 1
+            else:
+                # Index by a fixed number
+                print >>sio, """
+                %(name)s_starts[%(i)s] = %(start)s;
+                %(name)s_steps[%(i)s] = %(step)s;
+                """ % dict(name=name, i=i, start=idx, step=0)
+        print >>sio, """
+        if (%(out)s) {
+            // Try to reuse the python object.
+            GpuArray_clear(&%(out)s->ga);
+        } else {
+            %(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context);
+        }
+        if (!%(out)s) { %(fail)s }
+        int %(name)s_err;
+        %(name)s_err = GpuArray_index(&%(out)s->ga, &%(inp)s->ga,
+                                      %(name)s_starts, %(name)s_steps,
+                                      %(name)s_stops)
+        if (%(name)s_err != GA_NO_ERROR) {
+            Py_DECREF(%(out)s); %(out)s = NULL;
+            PyErr_SetString(PyExc_RuntimeError, "Error during index");
+            %(fail)s
+        }
+""" % dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0])
+        return sio.getvalue()
+    def c_code_cache_version(self):
+        return (0,)
+    def grad(self, inputs, grads):
+        gz, = grads
+        x = inputs[0]
+        rest = inputs[1:]
+        output = self(*inputs)
+        if output.dtype.find('int') != -1:
+            first = zeros_like(x, theano.config.floatX)
+        else:
+            first = GpuIncSubtensor(self.idx_list)(zeros_like(x), gz, *rest)
+        return ([first] + [DisconnectedType()()] * len(rest))
--- a/theano/sandbox/gpuarray/tests/test_basic_ops.py
+++ b/theano/sandbox/gpuarray/tests/test_basic_ops.py
@@ -9,7 +9,6 @@ from theano.compile import DeepCopyOp
 from theano.tensor.tests.test_basic import safe_make_node
 from theano.tests.unittest_tools import SkipTest
 from numpy.testing.noseclasses import KnownFailureTest
-from theano.tensor.tests.test_subtensor import T_subtensor
 import theano.sandbox.gpuarray
@@ -35,7 +34,7 @@ from theano.sandbox.gpuarray.type import (GpuArrayType,
 from theano.sandbox.gpuarray.basic_ops import (host_from_gpu, gpu_from_host,
                                               gpu_alloc, gpu_from_cuda,
                                               cuda_from_gpu, HostFromGpu,
-                                               GpuFromHost, GpuSubtensor)
+                                               GpuFromHost)
 from theano.tests import unittest_tools as utt
 utt.seed_rng()
@@ -330,18 +329,3 @@ def test_deep_copy():
    res = f(a)
    assert GpuArrayType.values_eq(res, a)
-class G_subtensor(T_subtensor):
-    def shortDescription(self):
-        return None
-    shared = staticmethod(gpuarray_shared_constructor)
-    sub = GpuSubtensor
-    mode = mode_with_gpu
-    dtype = 'float32' # avoid errors on gpus which do not support float64
-    ignore_topo = (HostFromGpu, GpuFromHost)
-    fast_compile = False
-    ops = (GpuSubtensor,)
-    def __init__(self, name):
-        T_subtensor.__init__(self, name)
--- a/theano/sandbox/gpuarray/tests/test_subtensor.py
+++ b/theano/sandbox/gpuarray/tests/test_subtensor.py
+from theano.tensor.tests.test_subtensor import T_subtensor
+from theano.sandbox.gpuarray.basic_ops import (HostFromGpu, GpuFromHost)
+from theano.sandbox.gpuarray.subtensor import GpuSubtensor
+from theano.sandbox.gpuarray.type import gpuarray_shared_constructor
+from theano.sandbox.gpuarray.tests.test_basic_ops import mode_with_gpu
+class G_subtensor(T_subtensor):
+    def shortDescription(self):
+        return None
+    shared = staticmethod(gpuarray_shared_constructor)
+    sub = GpuSubtensor
+    mode = mode_with_gpu
+    dtype = 'float32' # avoid errors on gpus which do not support float64
+    ignore_topo = (HostFromGpu, GpuFromHost)
+    fast_compile = False
+    ops = (GpuSubtensor,)
+    def __init__(self, name):
+        T_subtensor.__init__(self, name)