Wrap op params for theano.gpuarray.elemwise.GpuDimShuffle.

b8f31218 · notoraptor · f98e8f99 · b8f31218 · b8f31218 · b8f31218
--- a/theano/gpuarray/c_code/dimshuffle.c
+++ b/theano/gpuarray/c_code/dimshuffle.c
-#section support_code
+#section support_code_apply
+
+int gpu_dimshuffle(PyGpuArrayObject* input, PyGpuArrayObject** out, PARAMS_TYPE* params) {
+    PyGpuArrayObject *tmp = NULL;
+    npy_intp nd_in = PyArray_SIZE(params->input_broadcastable);
+    npy_intp nd_out = PyArray_SIZE(params->_new_order);
+    npy_int64* new_order = NULL;
+    unsigned int* transposition = NULL;
+    size_t* sh = NULL;
+    int e;
+
+    if (input->ga.nd != nd_in) {
+        PyErr_SetString(PyExc_TypeError, "input nd");
+        return 1;
+    }
+    if (!PyArray_IS_C_CONTIGUOUS(params->_new_order)) {
+        PyErr_SetString(PyExc_RuntimeError, "DimShuffle: param _new_order must be C-contiguous.");
+        return 1;
+    }
+    if (!PyArray_IS_C_CONTIGUOUS(params->transposition)) {
+        PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: param transposition must be C-contiguous.");
+        return 1;
+    }
+
+    Py_XDECREF(*out);
+
+    /** Do shuffle. **/
+
+    new_order = (npy_int64*) PyArray_DATA(params->_new_order);
+    transposition = (unsigned int*) malloc(nd_in * sizeof(unsigned int));
+    sh = (size_t*) malloc(nd_out * sizeof(size_t));
+    if (transposition == NULL || sh == NULL) {
+        PyErr_NoMemory();
+        free(transposition);
+        free(sh);
+        return 1;
+    }
+    for (npy_intp i = 0; i < nd_in; ++i) {
+        transposition[i] = ((npy_int64*) PyArray_DATA(params->transposition))[i];
+    }
+    tmp = pygpu_transpose(input, transposition);
+    if (!tmp) {
+        PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: unable to transpose input.");
+        free(transposition);
+        free(sh);
+        return 1;
+    }
+    e = 0;
+    for (npy_intp i = 0; i < nd_out; ++i) {
+        if (new_order[i] == -1) {
+            sh[i] = 1;
+        } else {
+            sh[i] = tmp->ga.dimensions[e];
+            ++e;
+        }
+    }
+    *out = pygpu_reshape(tmp, nd_out, sh, GA_ANY_ORDER, 1, -1);
+    Py_DECREF(tmp);
+    free(transposition);
+    free(sh);
+
+    /** End shuffle. **/
+
+    if (*out == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: unable to reshape output.");
+        return 1;
+    }
+
+    if (!params->inplace) {
+        tmp = pygpu_copy(*out, GA_ANY_ORDER);
+        Py_DECREF(*out);
+        if (!tmp) {
+            PyErr_SetString(PyExc_RuntimeError, "GpuDimShuffle: unable to copy output.");
+            *out = NULL;
+            return 1;
+        }
+        *out = tmp;
+    }
+
+    return 0;
+}
--- a/theano/gpuarray/elemwise.py
+++ b/theano/gpuarray/elemwise.py
@@ -407,12 +407,13 @@ class SupportCodeError(Exception):
    """


-class GpuDimShuffle(HideC, DimShuffle):
+class GpuDimShuffle(DimShuffle):
    """
    DimShuffle on the GPU.

    """
    _f16_ok = True
+    c_func_name = 'gpu_dimshuffle'

    def make_node(self, input):
        ctx_name = infer_context_name(input)
@@ -448,66 +449,6 @@ class GpuDimShuffle(HideC, DimShuffle):

        storage[0] = res

-    def c_support_code_apply(self, node, name):
-        def copy_shape(nd_out):
-            stmts = []
-            e = 0
-            for d in range(nd_out):
-                if d in self.augment:
-                    stmts.append("sh[%s] = 1;" % (d,))
-                else:
-                    stmts.append("sh[%s] = tmp->ga.dimensions[%s];" % (d, e))
-                    e += 1
-            return '\n            '.join(stmts)
-
-        return """
-        static const unsigned int %(name)s_ax[] = {%(shuffle)s};
-
-        static PyGpuArrayObject *%(name)s_f(PyGpuArrayObject *a) {
-            PyGpuArrayObject *res, *tmp;
-            size_t sh[%(nd_out)s];
-
-            tmp = pygpu_transpose(a, %(name)s_ax);
-            if (!tmp) return NULL;
-            %(copy_shape)s
-            res = pygpu_reshape(tmp, %(nd_out)s, sh, GA_ANY_ORDER, 1, -1);
-            Py_DECREF(tmp);
-            return res;
-        }
-        """ % dict(shuffle=', '.join(str(a) for a in (self.shuffle + self.drop)),
-                   name=name, nd_out=len(self.new_order),
-                   copy_shape=copy_shape(len(self.new_order)))
-
-    def c_code(self, node, name, inputs, outputs, sub):
-        d = dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0],
-                 nd=len(self.input_broadcastable))
-        process = """
-        PyGpuArrayObject *tmp = NULL;
-        if (%(inp)s->ga.nd != %(nd)s) {
-            PyErr_SetString(PyExc_TypeError, "input nd");
-            %(fail)s
-        }
-
-        Py_XDECREF(%(out)s);
-        %(out)s = %(name)s_f(%(inp)s);
-        if (%(out)s == NULL) {%(fail)s}
-        """ % d
-
-        if not self.inplace:
-            process += """
-            tmp = pygpu_copy(%(out)s, GA_ANY_ORDER);
-            Py_DECREF(%(out)s);
-            if (!tmp) {
-                %(out)s = NULL;
-                %(fail)s
-            }
-            %(out)s = tmp;
-            """ % d
-        return process
-
-    def c_code_cache_version(self):
-        return (5,)
-

 class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
    """
@@ -563,7 +504,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
            reduce_mask = tuple(reduce_mask)
        self.reduce_mask = reduce_mask

-        # used to make sure that calls to scalar op
+        # used to make sure that callfs to scalar op
        # have unique name arguments
        self._n_scalar_op_calls = 0
        CAReduceDtype.__init__(self, scalar_op, axis=axis,

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -139,6 +139,7 @@ class DimShuffle(COp):
        # because of importation issues related to TensorType.
        return ParamsType(input_broadcastable=TensorType(dtype='bool', broadcastable=(False,)),
                          _new_order=theano.tensor.lvector,
+                          transposition=theano.tensor.lvector,
                          inplace=theano.scalar.bool)

    @property
@@ -148,6 +149,10 @@ class DimShuffle(COp):
        # We replace it with -1.
        return [(-1 if x == 'x' else x) for x in self.new_order]

+    @property
+    def transposition(self):
+        return self.shuffle + self.drop
+
    def __init__(self, input_broadcastable, new_order, inplace=True):
        COp.__init__(self, [self.c_func_file], self.c_func_name)
        self.input_broadcastable = tuple(input_broadcastable)
@@ -206,8 +211,6 @@ class DimShuffle(COp):
        if not hasattr(self, 'func_files'):
            # Perhaps we are loading an old `Op` version of DimShuffle.
            # Let's just build the COp.
-            self.c_func_file = 'c_code/dimshuffle.c'
-            self.c_func_name = 'cpu_dimshuffle'
            COp.__init__(self, [self.c_func_file], self.c_func_name)

    def make_node(self, _input):