C code for subtensor and inc_subtensor

0c5dc59f · James Bergstra · 7e7998ec · 0c5dc59f · 0c5dc59f · 0c5dc59f
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2970,15 +2970,22 @@ class Subtensor(Op):
            raise exception

        #infer the broadcasting pattern
-        padded = idx_list + [slice(0,sys.maxint,1)] * (x.type.ndim - len(idx_list))
-        broadcastable = [bc for p, bc in zip(padded, x.type.broadcastable) if isinstance(p, slice)]
+        padded = (idx_list
+                + [slice(0,sys.maxint,1)] * (x.type.ndim - len(idx_list)))
+        broadcastable = [bc for p, bc in zip(padded, x.type.broadcastable)
+                if isinstance(p, slice)]

-        input_types = Subtensor.collapse(idx_list, lambda entry: isinstance(entry, gof.Type))
+        input_types = Subtensor.collapse(idx_list,
+                lambda entry: isinstance(entry, gof.Type))
        if len(inputs) != len(input_types):
-            raise IndexError("Not enough inputs to fill in the Subtensor template.", inputs, idx_list)
+            raise IndexError(
+                    "Not enough inputs to fill in the Subtensor template.",
+                    inputs, idx_list)
        for input, expected_type in zip(inputs, input_types):
            if input.type != expected_type:
-                raise TypeError("Wrong type for the Subtensor template. Expected %s, got %s." % (input.type, expected_type))
+                raise TypeError(
+                    "Wrong type for Subtensor template. Expected %s, got %s."%(
+                        input.type, expected_type))

        return gof.Apply(self,
                         (x, ) + inputs,
@@ -3077,6 +3084,242 @@ class Subtensor(Op):
                indices.append(str(entry))
        return "%s{%s}" % (self.__class__.__name__, ", ".join(indices))

+    @staticmethod
+    def helper_c_code(node, name, inputs, outputs, sub, idx_list):
+        #
+        # two arrays are created:
+        # is_slice: len == ndim, 0 means int, 1 means slice
+        # subtensor_spec: len = n_ints + 3 * n_slices
+        #
+        fail = sub['fail']
+        init_cmds = []
+        is_slice = []
+        inplace = 1
+        NONE_CODE = sys.maxint - 1
+
+        pos = [0,1] #annoying version of global variable for init_entry
+        def inc_spec_pos(amt): pos[0] += amt
+        def inc_input_pos(amt): pos[1] += amt
+        def spec_pos(): return pos[0]
+        def input_pos(): return pos[1]
+        def init_entry(entry, depth=0):
+            if isinstance(entry, int):
+                init_cmds.append(
+                        "subtensor_spec[%i] = %i;" %(spec_pos(),
+                            entry))
+                inc_spec_pos(1)
+                if depth==0:
+                    is_slice.append(0)
+            elif isinstance(entry, Type):
+                init_cmds.append(
+                        "subtensor_spec[%i] = %s;" %(spec_pos(),
+                            inputs[input_pos()]))
+                inc_spec_pos(1)
+                inc_input_pos(1)
+                if depth==0:
+                    is_slice.append(0)
+            elif entry is None:
+                init_cmds.append(
+                        "subtensor_spec[%i] = %i;" %(spec_pos(),
+                            NONE_CODE))
+                inc_spec_pos(1)
+                if depth==0:
+                    is_slice.append(0)
+            elif depth==0 and isinstance(entry, slice):
+                init_entry(entry.start, depth+1)
+                init_entry(entry.stop, depth+1)
+                init_entry(entry.step, depth+1)
+                is_slice.append(1)
+            else:
+                assert 0, entry
+
+        for entry in idx_list:
+            init_entry(entry)
+        #make sure we used all inputs
+        assert input_pos() == len(inputs), input_pos()
+        assert len(is_slice) <= node.inputs[0].ndim, node.inputs[0].ndim
+
+        len_is_slice = len(is_slice)
+        view_ndim = node.inputs[0].ndim - (numpy.asarray(is_slice)==0).sum()
+
+        len_subtensor_spec = spec_pos()
+
+        is_slice_init = ",".join([str(s) for s in is_slice])
+        subtensor_init = "\n".join(init_cmds)
+
+        x, = inputs[:1]
+        z, = outputs
+
+        rval = """
+        // The subtensor is created by iterating over the dimensions
+        // and updating stride, shape, and data pointers
+
+        int is_slice[] = {%(is_slice_init)s};
+        npy_intp subtensor_spec[%(len_subtensor_spec)s];
+        %(subtensor_init)s;
+        int spec_pos = 0; //position in subtensor_spec
+        int inner_ii = 0; // the current dimension of zview
+        int outer_ii = 0; // current dimension of z
+
+        //TODO: give this Op a second output so that this view can be cached
+        //TODO: alternatively, fix the memory leak on failure
+        Py_INCREF(%(x)s->descr);
+        PyArrayObject * xview = (PyArrayObject*)PyArray_NewFromDescr(
+                &PyArray_Type,
+                %(x)s->descr,
+                %(view_ndim)s,
+                %(x)s->dimensions,
+                %(x)s->strides,
+                %(x)s->data,
+                %(x)s->flags,
+                NULL);
+        if (!xview)
+        {
+            %(fail)s;
+        }
+        assert (xview->dimensions != %(x)s->dimensions);
+        assert (xview->strides != %(x)s->strides);
+
+        for (; outer_ii < %(len_is_slice)s; ++outer_ii)
+        {
+            if (is_slice[outer_ii])
+            {
+                npy_intp length = %(x)s->dimensions[outer_ii];
+                npy_intp slicelength;
+                npy_intp start = subtensor_spec[spec_pos+0];
+                npy_intp stop  = subtensor_spec[spec_pos+1];
+                npy_intp step  = subtensor_spec[spec_pos+2];
+                if (step == %(NONE_CODE)s) step = 1;
+
+                npy_intp defstart = step < 0 ? length-1 : 0;
+                npy_intp defstop = step < 0 ? -1 : length;
+
+                // logic adapted from
+                // PySlice_GetIndicesEx in python source
+                if (!step)
+                {
+                    Py_DECREF(xview);
+                    PyErr_Format(PyExc_ValueError, "slice step cannot be zero");
+                    %(fail)s;
+                }
+
+                if (start == %(NONE_CODE)s)
+                {
+                    start = defstart;
+                }
+                else
+                {
+                    if (start < 0) start += length;
+                    if (start < 0) start = (step < 0) ? -1 : 0;
+                    if (start >= length)
+                        start = (step < 0) ? length - 1 : length;
+                }
+
+                if (stop == %(NONE_CODE)s)
+                {
+                    stop = defstop;
+                }
+                else
+                {
+                    if (stop < 0) stop += length;
+                    if (stop < 0) stop = (step < 0) ? -1 : 0;
+                    if (stop >= length)
+                        stop = (step < 0) ? length - 1 : length;
+                }
+
+                if ((step < 0 && stop >= start)
+                    || (step > 0 && start >= stop)) {
+                    slicelength = 0;
+                }
+                else if (step < 0) {
+                    slicelength = (stop-start+1)/step+1;
+                }
+                else {
+                    slicelength = (stop-start-1)/step+1;
+                }
+
+                if (0){
+                    fprintf(stdout, "start %%zi\\n", start);
+                    fprintf(stdout, "stop %%zi\\n", stop);
+                    fprintf(stdout, "step %%zi\\n", step);
+                    fprintf(stdout, "length %%zi\\n", length);
+                    fprintf(stdout, "slicelength %%zi\\n", slicelength);
+                }
+
+                assert (slicelength <= length);
+                xview->data += %(x)s->strides[outer_ii] * start;
+                xview->dimensions[inner_ii] = slicelength;
+                xview->strides[inner_ii] = %(x)s->strides[outer_ii] * step;
+
+                inner_ii += 1;
+                spec_pos += 3;
+            }
+            else // tuple coord `outer_ii` is an int
+            {
+                int idx = subtensor_spec[spec_pos];
+                if (idx < 0) idx += %(x)s->dimensions[outer_ii];
+                if (idx >= 0)
+                {
+                    if (idx < %(x)s->dimensions[outer_ii])
+                    {
+                        xview->data += %(x)s->strides[outer_ii] * idx;
+                    }
+                    else
+                    {
+                        PyErr_Format(PyExc_IndexError,"index out of bounds");
+                        %(fail)s;
+                    }
+                }
+                else
+                {
+                    PyErr_Format(PyExc_IndexError,"index out of bounds");
+                    %(fail)s;
+                }
+
+                spec_pos += 1;
+            }
+        }
+        assert (inner_ii <= xview->nd);
+        while (inner_ii < xview->nd)
+        {
+            assert (outer_ii < %(x)s->nd);
+            xview->dimensions[inner_ii] = %(x)s->dimensions[outer_ii];
+            xview->strides[inner_ii] = %(x)s->strides[outer_ii];
+            inner_ii += 1;
+            outer_ii += 1;
+        }
+        PyArray_UpdateFlags(xview, NPY_C_CONTIGUOUS|NPY_F_CONTIGUOUS);
+        """% locals()
+        #print rval
+        return rval
+
+    @staticmethod
+    def helper_c_code_cache_version():
+        return (2,)
+
+    def c_code(self, node, name, inputs, outputs, sub): #DEBUG
+        part0 = self.helper_c_code(node, name, inputs, outputs, sub,
+                self.idx_list)
+
+        x = inputs[0]
+        z, = outputs
+        part1 = """
+        if (%(z)s) Py_DECREF(%(z)s);
+        Py_INCREF(py_%(x)s);
+        xview->base = py_%(x)s;
+        assert(py_%(x)s == (PyObject*)%(x)s);
+        %(z)s = xview;
+        """ %locals()
+
+        return part0 + part1
+
+
+    def c_code_cache_version(self):
+        hv = self.helper_c_code_cache_version()
+        if hv:
+            return (1, hv)
+        else:
+            return ()


 class SubtensorPrinter:
@@ -3134,7 +3377,8 @@ def set_subtensor(x, y, inplace=False,
    :param y: symbolic variable for the rvalue of = operation
    :param tolerate_inplace_aliasing: see inc_subtensor for documentation.
    """
-    return inc_subtensor(x, y, inplace, set_instead_of_inc=True)
+    return inc_subtensor(x, y, inplace, set_instead_of_inc=True,
+            tolerate_inplace_aliasing=tolerate_inplace_aliasing)

 def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
        tolerate_inplace_aliasing=False):
@@ -3227,7 +3471,7 @@ class IncSubtensor(Op):
        else:
            msg += 'Set'
        return  "%s%s{%s}" % (msg,
-                self.__class__.__name__, ", ".join(indices))
+                self.__class__.__name__[3:], ", ".join(indices))

    def make_node(self, x, y, *inputs):
        x, y = map(as_tensor_variable, [x, y])
@@ -3243,18 +3487,22 @@ class IncSubtensor(Op):
            raise exception

        #infer the broadcasting pattern
-        padded = idx_list + [slice(0,sys.maxint,1)] * (x.type.ndim - len(idx_list))
-        broadcastable = [bc for p, bc in zip(padded, x.type.broadcastable) if isinstance(p, slice)]
-
-        #if y.type.broadcastable != tuple(broadcastable):
-        #    raise TypeError("Invalid broadcastable pattern for y in IncSubtensor.make_node")
+        padded = (idx_list
+                + [slice(0,sys.maxint,1)] * (x.type.ndim - len(idx_list)))
+        broadcastable = [bc for p, bc in zip(padded, x.type.broadcastable)
+                if isinstance(p, slice)]

-        input_types = Subtensor.collapse(idx_list, lambda entry: isinstance(entry, gof.Type))
+        input_types = Subtensor.collapse( idx_list,
+                lambda entry: isinstance(entry, gof.Type))
        if len(inputs) != len(input_types):
-            raise IndexError("Not enough inputs to fill in the Subtensor template.", inputs, idx_list)
+            raise IndexError(
+                    "Not enough inputs to fill in the Subtensor template.",
+                    inputs, idx_list)
        for input, expected_type in zip(inputs, input_types):
            if input.type != expected_type:
-                raise TypeError("Wrong type for the Subtensor template. Expected %s, got %s." % (input.type, expected_type))
+                raise TypeError(
+                    "Wrong type for Subtensor template. Expected %s, got %s."%(
+                        input.type, expected_type))

        return gof.Apply(self,
                         (x, y) + inputs,
@@ -3296,6 +3544,85 @@ class IncSubtensor(Op):
                x.__setitem__(cdata, y)
        out[0] = x

+    def c_code(self, node, name, inputs, outputs, sub): #DEBUG
+
+        if self.inplace: # convert bool to int
+            inplace = 1
+        else:
+            inplace = 0
+        x = inputs[0]
+        y = inputs[1]
+        z, = outputs
+        if self.set_instead_of_inc: # convert bool to int
+            op_is_set = 1
+        else:
+            op_is_set = 0
+        fail = sub['fail']
+
+        copy_input_if_necessary = """
+        if (%(inplace)s)
+        {
+            if (%(x)s != %(z)s)
+            {
+                if (%(z)s) Py_DECREF(%(z)s);
+                Py_INCREF(%(x)s);
+                %(z)s = %(x)s;
+            }
+        }
+        else
+        {
+            if (%(z)s) Py_DECREF(%(z)s);
+            %(z)s = (PyArrayObject*)PyArray_FromAny(py_%(x)s, NULL, 0, 0, NPY_ENSURECOPY, NULL);
+        }
+        """ % locals()
+
+        # make xview actually a view of %(z)s
+        get_xview = Subtensor.helper_c_code(node, name,
+                outputs[:1]+inputs[2:],
+                outputs, sub, self.idx_list)
+
+
+        make_modification = """
+        if (%(op_is_set)s)
+        {
+            if (PyArray_CopyInto(xview, %(y)s)) // does broadcasting
+            {
+                Py_DECREF(xview);
+                %(fail)s;
+            }
+        }
+        else
+        {
+            PyArrayObject * add_rval = (PyArrayObject*)PyNumber_InPlaceAdd(
+                    (PyObject*)xview, py_%(y)s);
+            if (add_rval)
+            {
+                assert (PyArray_Check((PyObject*)add_rval));
+                assert (add_rval->data == xview->data);
+                Py_DECREF(add_rval);
+            }
+            else
+            {
+                Py_DECREF(xview);
+                %(fail)s;
+            }
+        }
+        """ %locals()
+
+        return (copy_input_if_necessary
+                + get_xview
+                + make_modification
+                + "Py_DECREF(xview);"
+                )
+
+    def c_code_cache_version(self):
+        hv = Subtensor.helper_c_code_cache_version()
+        if hv:
+            return (1, hv)
+        else:
+            return ()
+
+
    def infer_shape(self, node, shapes):
        return [shapes[0]]


--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -2057,7 +2057,7 @@ class T_subtensor(unittest.TestCase):
            for stop in [None] + [-8,-5,-1,0,1,5,8]:
                for step in [None]+[-3,-1,2]:
                    outs += [ data[start:stop:step].shape ]
-                    shapes += [data.get_value()[start:stop:step].shape ]
+                    shapes += [data.get_value(borrow=True)[start:stop:step].shape ]
            f = function([], outs, mode = mode_opt)
            t_shapes = f()
            for t_shape, shape in zip(t_shapes,shapes):
@@ -2065,7 +2065,6 @@ class T_subtensor(unittest.TestCase):
            assert theano.tensor.Subtensor not in [ x.op for x in
                                           f.maker.env.toposort() ]

-
    def test_shape_i_scalar(self):
        # Each axis is treated independently by shape_i/shape operators


--- a/theano/tensor/tests/test_inc_subtensor.py
+++ b/theano/tensor/tests/test_inc_subtensor.py
@@ -20,7 +20,7 @@ class Test_inc_subtensor(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()

-    def test_simple_ok(self):
+    def test_simple_2d(self):
        """Increments or sets part of a tensor by a scalar using full slice and
        a partial slice depending on a scalar.
        """
@@ -52,8 +52,41 @@ class Test_inc_subtensor(unittest.TestCase):
                expected_result[:,:val_sl2_end] += val_inc

            self.assertTrue(numpy.array_equal(result, expected_result))
-        return

+    def test_simple_3d(self):
+        """Increments or sets part of a tensor by a scalar using full slice and
+        a partial slice depending on a scalar.
+        """
+        a = T.dtensor3()
+        increment = T.dscalar()
+        sl1 = slice(None)
+        sl2_end = T.lscalar()
+        sl2 = slice(sl2_end)
+        sl3 = 2
+
+        for do_set in [True,False]:
+            print "Set", do_set
+
+            if do_set:
+                resut = T.set_subtensor(a[sl1, sl3, sl2], increment)
+            else:
+                resut = T.inc_subtensor(a[sl1, sl3, sl2], increment)
+
+            f = theano.function([a, increment, sl2_end], resut)
+
+            val_a = numpy.ones((5,3,4))
+            val_inc = 2.3
+            val_sl2_end = 2
+
+            expected_result = numpy.copy(val_a)
+            result = f(val_a, val_inc, val_sl2_end)
+
+            if do_set:
+                expected_result[:,sl3,:val_sl2_end] = val_inc
+            else:
+                expected_result[:,sl3,:val_sl2_end] += val_inc
+
+            self.assertTrue(numpy.array_equal(result, expected_result))
    def test_grad(self):