Merge pull request #6174 from notoraptor/op-params-dimshuffle

Wrap op params for [Gpu]DimShuffle.

Merge pull request #6174 from notoraptor/op-params-dimshuffle
173f72bf · Pascal Lamblin · GitHub · 50b1e231 · 13bc71b3 · 173f72bf
--- a/theano/gpuarray/c_code/dimshuffle.c
+++ b/theano/gpuarray/c_code/dimshuffle.c
+#section support_code_apply
+
+int gpu_dimshuffle(PyGpuArrayObject* input, PyGpuArrayObject** out, PARAMS_TYPE* params) {
+    PyGpuArrayObject *tmp = NULL;
+    npy_intp nd_in = PyArray_SIZE(params->input_broadcastable);
+    npy_intp nd_out = PyArray_SIZE(params->_new_order);
+    npy_int64* new_order = NULL;
+    unsigned int* transposition = NULL;
+    size_t* sh = NULL;
+    int e;
+
+    if (input->ga.nd != nd_in) {
+        PyErr_SetString(PyExc_TypeError, "input nd");
+        return 1;
+    }
+    if (!PyArray_IS_C_CONTIGUOUS(params->_new_order)) {
+        PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param _new_order must be C-contiguous.");
+        return 1;
+    }
+    if (!PyArray_IS_C_CONTIGUOUS(params->transposition)) {
+        PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: param transposition must be C-contiguous.");
+        return 1;
+    }
+
+    Py_XDECREF(*out);
+
+    /** Do shuffle. **/
+
+    new_order = (npy_int64*) PyArray_DATA(params->_new_order);
+    /* Type of params->transposition (npy_uint32) should be an alias of unsigned int
+     * on platforms supported by Theano. */
+    transposition = (unsigned int*) PyArray_DATA(params->transposition);
+    sh = (size_t*) malloc(nd_out * sizeof(size_t));
+    if (sh == NULL) {
+        PyErr_NoMemory();
+        return 1;
+    }
+    tmp = pygpu_transpose(input, transposition);
+    if (!tmp) {
+        free(sh);
+        return 1;
+    }
+    e = 0;
+    for (npy_intp i = 0; i < nd_out; ++i) {
+        if (new_order[i] == -1) {
+            sh[i] = 1;
+        } else {
+            sh[i] = tmp->ga.dimensions[e];
+            ++e;
+        }
+    }
+    *out = pygpu_reshape(tmp, nd_out, sh, GA_ANY_ORDER, 1, -1);
+    Py_DECREF(tmp);
+    free(sh);
+
+    if (*out == NULL) {
+        return 1;
+    }
+
+    /** End shuffle. **/
+
+    if (!params->inplace) {
+        tmp = pygpu_copy(*out, GA_ANY_ORDER);
+        Py_DECREF(*out);
+        if (!tmp) {
+            *out = NULL;
+            return 1;
+        }
+        *out = tmp;
+    }
+
+    return 0;
+}
--- a/theano/gpuarray/elemwise.py
+++ b/theano/gpuarray/elemwise.py
@@ -407,12 +407,13 @@ class SupportCodeError(Exception):
    """


-class GpuDimShuffle(HideC, DimShuffle):
+class GpuDimShuffle(DimShuffle):
    """
    DimShuffle on the GPU.

    """
    _f16_ok = True
+    c_func_name = 'gpu_dimshuffle'

    def make_node(self, input):
        ctx_name = infer_context_name(input)
@@ -430,7 +431,7 @@ class GpuDimShuffle(HideC, DimShuffle):
            s = "GpuDimShuffle{%s}"
        return s % (','.join(str(x) for x in self.new_order))

-    def perform(self, node, inp, out):
+    def perform(self, node, inp, out, params):
        input, = inp
        storage, = out

@@ -448,66 +449,6 @@ class GpuDimShuffle(HideC, DimShuffle):

        storage[0] = res

-    def c_support_code_apply(self, node, name):
-        def copy_shape(nd_out):
-            stmts = []
-            e = 0
-            for d in range(nd_out):
-                if d in self.augment:
-                    stmts.append("sh[%s] = 1;" % (d,))
-                else:
-                    stmts.append("sh[%s] = tmp->ga.dimensions[%s];" % (d, e))
-                    e += 1
-            return '\n            '.join(stmts)
-
-        return """
-        static const unsigned int %(name)s_ax[] = {%(shuffle)s};
-
-        static PyGpuArrayObject *%(name)s_f(PyGpuArrayObject *a) {
-            PyGpuArrayObject *res, *tmp;
-            size_t sh[%(nd_out)s];
-
-            tmp = pygpu_transpose(a, %(name)s_ax);
-            if (!tmp) return NULL;
-            %(copy_shape)s
-            res = pygpu_reshape(tmp, %(nd_out)s, sh, GA_ANY_ORDER, 1, -1);
-            Py_DECREF(tmp);
-            return res;
-        }
-        """ % dict(shuffle=', '.join(str(a) for a in (self.shuffle + self.drop)),
-                   name=name, nd_out=len(self.new_order),
-                   copy_shape=copy_shape(len(self.new_order)))
-
-    def c_code(self, node, name, inputs, outputs, sub):
-        d = dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0],
-                 nd=len(self.input_broadcastable))
-        process = """
-        PyGpuArrayObject *tmp = NULL;
-        if (%(inp)s->ga.nd != %(nd)s) {
-            PyErr_SetString(PyExc_TypeError, "input nd");
-            %(fail)s
-        }
-
-        Py_XDECREF(%(out)s);
-        %(out)s = %(name)s_f(%(inp)s);
-        if (%(out)s == NULL) {%(fail)s}
-        """ % d
-
-        if not self.inplace:
-            process += """
-            tmp = pygpu_copy(%(out)s, GA_ANY_ORDER);
-            Py_DECREF(%(out)s);
-            if (!tmp) {
-                %(out)s = NULL;
-                %(fail)s
-            }
-            %(out)s = tmp;
-            """ % d
-        return process
-
-    def c_code_cache_version(self):
-        return (5,)
-

 class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
    """

--- a/theano/tensor/c_code/dimshuffle.c
+++ b/theano/tensor/c_code/dimshuffle.c
+#section support_code_apply
+
+int cpu_dimshuffle(PyArrayObject* input, PyArrayObject** res, PARAMS_TYPE* params) {
+    npy_bool* input_broadcastable;
+    npy_int64* new_order;
+    npy_intp nd_in;
+    npy_intp nd_out;
+    PyArrayObject* basename;
+    npy_intp* dimensions;
+    npy_intp* strides;
+
+    if (!PyArray_IS_C_CONTIGUOUS(params->input_broadcastable)) {
+        PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param input_broadcastable must be C-contiguous.");
+        return 1;
+    }
+    if (!PyArray_IS_C_CONTIGUOUS(params->_new_order)) {
+        PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param _new_order must be C-contiguous.");
+        return 1;
+    }
+    input_broadcastable = (npy_bool*) PyArray_DATA(params->input_broadcastable);
+    new_order = (npy_int64*) PyArray_DATA(params->_new_order);
+    nd_in = PyArray_SIZE(params->input_broadcastable);
+    nd_out = PyArray_SIZE(params->_new_order);
+
+    /* check_input_nd */
+    if (PyArray_NDIM(input) != nd_in) {
+        PyErr_SetString(PyExc_NotImplementedError, "input nd");
+        return 1;
+    }
+
+    /* clear_output */
+    if (*res)
+        Py_XDECREF(*res);
+
+    /* get_base */
+    if (params->inplace) {
+        basename = input;
+        Py_INCREF((PyObject*)basename);
+    } else {
+        basename =
+            (PyArrayObject*)PyArray_FromAny((PyObject*)input,
+                                            NULL, 0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY, NULL);
+    }
+
+    /* shape_statements and strides_statements */
+    dimensions = (npy_intp*) malloc(nd_out * sizeof(npy_intp));
+    strides = (npy_intp*) malloc(nd_out * sizeof(npy_intp));
+    if (dimensions == NULL || strides == NULL) {
+        PyErr_NoMemory();
+        free(dimensions);
+        free(strides);
+        return 1;
+    };
+
+    for (npy_intp i = 0; i < nd_out; ++i) {
+        if (new_order[i] != -1) {
+            dimensions[i] = PyArray_DIMS(basename)[new_order[i]];
+            strides[i] = PyArray_DIMS(basename)[new_order[i]] == 1 ?
+                            0 : PyArray_STRIDES(basename)[new_order[i]];
+        } else {
+            dimensions[i] = 1;
+            strides[i] = 0;
+        }
+    }
+
+    /* set the strides of the broadcasted dimensions.
+     * This algorithm is from numpy: PyArray_Newshape() in
+     * cvs/numpy/numpy/core/src/multiarraymodule.c */
+    if (nd_out > 0) {
+        if (strides[nd_out - 1] == 0)
+            strides[nd_out - 1] = PyArray_DESCR(basename)->elsize;
+        for (npy_intp i = nd_out - 2; i > -1; --i) {
+            if (strides[i] == 0)
+                strides[i] = strides[i + 1] * dimensions[i + 1];
+        }
+    }
+
+    /* close_bracket */
+    // create a new array.
+    *res = (PyArrayObject*)PyArray_New(&PyArray_Type, nd_out, dimensions,
+                                       PyArray_TYPE(basename), strides,
+                                       PyArray_DATA(basename), PyArray_ITEMSIZE(basename),
+                                       // borrow only the writable flag from the base
+                                       // the NPY_OWNDATA flag will default to 0.
+                                       (NPY_ARRAY_WRITEABLE * PyArray_ISWRITEABLE(basename)),
+                                       NULL);
+
+    if (*res == NULL) {
+        free(dimensions);
+        free(strides);
+        return 1;
+    }
+
+    // recalculate flags: CONTIGUOUS, FORTRAN, ALIGNED
+    PyArray_UpdateFlags(*res, NPY_ARRAY_UPDATE_ALL);
+
+    // we are making a view in both inplace and non-inplace cases
+    PyArray_SetBaseObject(*res, (PyObject*)basename);
+
+    free(strides);
+    free(dimensions);
+
+    return 0;
+}
--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -9,7 +9,7 @@ import theano
 from theano import gof
 from theano.compat import izip
 from theano.configparser import change_flags
-from theano.gof import Apply, Op, OpenMPOp
+from theano.gof import Apply, Op, COp, OpenMPOp, ParamsType
 from theano import scalar
 from theano.scalar import get_scalar_type
 from theano.printing import pprint
@@ -50,7 +50,7 @@ def TensorConstant(*inputs, **kwargs):
 #   DimShuffle   #
 ##################

-class DimShuffle(Op):
+class DimShuffle(COp):
    """
    Allows to reorder the dimensions of a tensor or insert or remove
    broadcastable dimensions.
@@ -130,12 +130,33 @@ class DimShuffle(Op):
    _f16_ok = True
    check_input = False
    __props__ = ("input_broadcastable", "new_order", "inplace")
+    c_func_file = 'c_code/dimshuffle.c'
+    c_func_name = 'cpu_dimshuffle'
+
+    @property
+    def params_type(self):
+        # We can't directly create `params_type` as class attribute
+        # because of importation issues related to TensorType.
+        return ParamsType(input_broadcastable=TensorType(dtype='bool', broadcastable=(False,)),
+                          _new_order=theano.tensor.lvector,
+                          transposition=TensorType(dtype='uint32', broadcastable=(False,)),
+                          inplace=theano.scalar.bool)
+
+    @property
+    def _new_order(self):
+        # Param for C code.
+        # self.new_order may contain 'x', which is not a valid integer value.
+        # We replace it with -1.
+        return [(-1 if x == 'x' else x) for x in self.new_order]
+
+    @property
+    def transposition(self):
+        return self.shuffle + self.drop

    def __init__(self, input_broadcastable, new_order, inplace=True):
-        input_broadcastable = tuple(input_broadcastable)
-        self.input_broadcastable = input_broadcastable
-        new_order = tuple(new_order)
-        self.new_order = new_order
+        COp.__init__(self, [self.c_func_file], self.c_func_name)
+        self.input_broadcastable = tuple(input_broadcastable)
+        self.new_order = tuple(new_order)
        if inplace is True:
            self.inplace = inplace
        else:
@@ -185,6 +206,13 @@ class DimShuffle(Op):
        if self.inplace:
            self.view_map = {0: [0]}

+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        if not hasattr(self, 'func_files'):
+            # Perhaps we are loading an old `Op` version of DimShuffle.
+            # Let's just build the COp.
+            COp.__init__(self, [self.c_func_file], self.c_func_name)
+
    def make_node(self, _input):
        input = as_tensor_variable(_input)
        ib = tuple(input.type.broadcastable)
@@ -222,7 +250,7 @@ class DimShuffle(Op):
        else:
            return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)

-    def perform(self, node, inp, out):
+    def perform(self, node, inp, out, params):
        input, = inp
        storage, = out
        # drop
@@ -260,104 +288,6 @@ class DimShuffle(Op):
            return [None]
        return self(*eval_points, **dict(return_list=True))

-    def c_code(self, node, name, inp, out, sub):
-        input, = inp
-        res, = out
-        basename = input + '__view_or_copy'
-
-        def statements(lst):
-            return ';\n'.join(lst) + ';'
-
-        nd_in = len(self.input_broadcastable)
-        nd_out = len(self.new_order)
-
-        check_input_nd = [('if (PyArray_NDIM(%(input)s) != ' + str(nd_in) + ')'
-                           '{PyErr_SetString(PyExc_NotImplementedError, '
-                           '"input nd"); %(fail)s;}')]
-
-        clear_output = ['if (%(res)s) {Py_XDECREF(%(res)s);}']
-
-        # get the copy / view of the input depending on whether we're doingi
-        # things inplace or not.
-        if self.inplace:
-            get_base = ['{ PyArrayObject * %(basename)s = %(input)s',
-                        'Py_INCREF((PyObject*)%(basename)s)']
-        else:
-            get_base = [
-                ('{ PyArrayObject * %(basename)s = '
-                 '(PyArrayObject*)PyArray_FromAny((PyObject*)%(input)s,'
-                 ' NULL, 0, 0, NPY_ARRAY_ALIGNED|NPY_ARRAY_ENSURECOPY,'
-                 ' NULL)')]
-
-        shape_statements = ['npy_intp dimensions[%i]' % nd_out]
-        for i, o in enumerate(self.new_order):
-            if o != 'x':
-                shape_statements += [('dimensions[' + str(
-                    i) + '] = PyArray_DIMS(%(basename)s)[' + str(o) + ']')]
-            else:
-                shape_statements += [('dimensions[' + str(i) + '] = 1')]
-
-        strides_statements = ['npy_intp strides[%i]' % nd_out]
-
-        # set the strides of the non-broadcasted dimensions
-        for i, o in enumerate(self.new_order):
-            if o != 'x':
-                strides_statements += [('strides[' + str(i) +
-                                        '] = PyArray_DIMS(%(basename)s)[' +
-                                        str(o) +
-                                        '] == 1? 0 : '
-                                        'PyArray_STRIDES(%(basename)s)[' +
-                                        str(o) + ']')]
-            else:
-                strides_statements += [('strides[' + str(i) + '] = 0')]
-
-        # set the strides of the broadcasted dimensions
-        # this algorithm is from numpy: PyArray_Newshape() in
-        # cvs/numpy/numpy/core/src/multiarraymodule.c
-        if nd_out > 0:
-            strides_statements.append(
-                'if (strides[' +
-                str(nd_out) +
-                '-1] == 0) strides[' +
-                str(nd_out) +
-                '-1] = PyArray_DESCR(%(basename)s)->elsize'
-            )
-        for i in xrange(nd_out - 2, -1, -1):
-            strides_statements.append(
-                "if (strides[%(i)s] == 0) strides[%(i)s] = strides[%(i)s+1] * "
-                "dimensions[%(i)s+1]" % dict(i=str(i)))
-
-        close_bracket = [
-            # create a new array,
-            ('%(res)s = (PyArrayObject*)PyArray_New(&PyArray_Type, '
-             '' + str(nd_out) + ', dimensions, '
-             'PyArray_TYPE(%(basename)s), strides, '
-             'PyArray_DATA(%(basename)s), PyArray_ITEMSIZE(%(basename)s), '
-             # borrow only the writable flag from the base
-             # the NPY_OWNDATA flag will default to 0.
-             '(NPY_ARRAY_WRITEABLE*PyArray_ISWRITEABLE(%(basename)s)), '
-             'NULL)'),
-            'if (%(res)s == NULL) %(fail)s;',
-            # recalculate flags: CONTIGUOUS, FORTRAN, ALIGNED
-            'PyArray_UpdateFlags(%(res)s, NPY_ARRAY_UPDATE_ALL)',
-            # we are making a view in both inplace and non-inplace cases
-            """
-PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
-"""
-            '}']
-
-        full_code = statements(check_input_nd +
-                               clear_output +
-                               get_base +
-                               shape_statements +
-                               strides_statements +
-                               close_bracket)
-
-        return full_code % dict(locals(), **sub)
-
-    def c_code_cache_version(self):
-        return (3,)
-
    def grad(self, inp, grads):
        x, = inp
        gz, = grads