Add GpuDimShuffle (both the C and python versions).

800bf55c · Arnaud Bergeron · 68ab8d91 · 800bf55c · 800bf55c · 800bf55c
--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -3,7 +3,7 @@ from itertools import izip

 import numpy
 from theano import Op, Apply, scalar
-from theano.tensor.elemwise import Elemwise
+from theano.tensor.elemwise import Elemwise, DimShuffle

 try:
    import pygpu
@@ -173,3 +173,115 @@ class SupportCodeError(Exception):
    """
    We do not support certain things (such as the C++ complex struct)
    """
+
+
+class GpuDimShuffle(DimShuffle):
+    def make_node(self, input):
+        res = DimShuffle.make_node(self, input)
+        otype = GpuArrayType(dtype=res.outputs[0].type.dtype,
+                             broadcastable=res.outputs[0].type.broadcastable)
+        input = as_gpuarray_variable(input)
+        return Apply(self, [input], [otype()])
+
+    def __str__(self):
+        if self.inplace:
+            s = "InplaceGpuDimShuffle{%s}"
+        else:
+            s = "GpuDimShuffle{%s}"
+        return s % (','.join(str(x) for x in self.new_order))
+
+    def perform(self, node, inp, out):
+        input, = inp
+        storage, = out
+
+        res = input
+
+        res = res.transpose(self.shuffle+self.drop)
+
+        shape = list(res.shape[:len(self.shuffle)])
+        for augm in self.augment:
+            shape.insert(augm, 1)
+        res = res.reshape(shape)
+
+        if not self.inplace:
+            res = res.copy()
+
+        storage[0] = res
+
+    def c_support_code_apply(self, node, name):
+        def copy_shape(nd_out):
+            stmts = []
+            e = 0
+            for d in range(nd_out):
+                if d in self.augment:
+                    stmts.append("sh[%s] = 1;" % (d,))
+                else:
+                    stmts.append("sh[%s] = tmp.dimensions[%s];" % (d, e))
+                    e += 1
+            return '\n            '.join(stmts)
+
+        return """
+        static const unsigned int %(name)s_ax[] = {%(shuffle)s};
+
+        static int %(name)s_f(GpuArrayObject *res, GpuArrayObject *a) {
+            GpuArray tmp;
+            size_t sh[%(nd_out)s];
+            unsigned int i;
+            int err;
+
+            err = GpuArray_transpose(&tmp, &a->ga, %(name)s_ax);
+            if (err != GA_NO_ERROR) {
+                PyErr_SetString(PyExc_RuntimeError, "error in _transpose call");
+                return -1;
+            }
+
+            %(copy_shape)s
+            err = GpuArray_reshape(&res->ga, &tmp, %(nd_out)s, sh,
+                                   GA_ANY_ORDER, 1);
+            if (err != GA_NO_ERROR) {
+               PyErr_SetString(PyExc_RuntimeError, "error in _reshape call");
+               return -1;
+            }
+            GpuArray_clear(&tmp);
+            return 0;
+        }
+        """ % dict(shuffle=', '.join(str(a) for a in (self.shuffle+self.drop)),
+                   name=name, nd_out=len(self.new_order),
+                   copy_shape=copy_shape(len(self.new_order)))
+
+    def c_code(self, node, name, inputs, outputs, sub):
+        d = dict(name=name, fail=sub['fail'], inp=inputs[0], out=outputs[0],
+                 nd=len(self.input_broadcastable))
+        process = """
+        if (%(inp)s->ga.nd != %(nd)s) {
+            PyErr_SetString(PyExc_TypeError, "input nd");
+            %(fail)s
+        }
+
+        Py_XDECREF(%(out)s);
+        %(out)s = new_GpuArray((PyObject *)&GpuArrayType, GpuArray_default_context());
+        if (%(out)s == NULL) {%(fail)s}
+
+        if (%(name)s_f(%(out)s, %(inp)s)) {
+            %(fail)s
+        }
+        """ % d
+
+        if not self.inplace:
+            process += """
+            if (%(out)s->ga.data == %(inp)s->ga.data) {
+                PyObject *%(name)s_tmp;
+                %(name)s_tmp = PyObject_CallMethod((PyObject *)%(out)s, "copy", NULL);
+                if (%(name)s_tmp == NULL) { %(fail)s }
+                if (!PyObject_IsInstance(%(name)s_tmp, (PyObject *)&GpuArrayType)) {
+                    PyErr_SetString(PyExc_TypeError, "not a GpuArray out of the copy");
+                    %(fail)s
+                }
+                Py_DECREF(%(out)s);
+                %(out)s = (GpuArrayObject *)%(name)s_tmp;
+            }
+            """ % d
+        return process
+
+    def c_code_cache_version(self):
+        return (0,)
--- a/theano/sandbox/gpuarray/tests/test_elemwise.py
+++ b/theano/sandbox/gpuarray/tests/test_elemwise.py
@@ -4,10 +4,10 @@ from theano import scalar
 from theano.gof import FunctionGraph
 from theano.gof.python25 import all, any

-from theano.tensor.tests.test_elemwise import test_Broadcast
+from theano.tensor.tests.test_elemwise import test_Broadcast, test_DimShuffle

 from theano.sandbox.gpuarray.tests.test_basic_ops import rand_gpuarray
-from theano.sandbox.gpuarray.elemwise import GpuElemwise
+from theano.sandbox.gpuarray.elemwise import GpuElemwise, GpuDimShuffle
 from theano.sandbox.gpuarray.type import GpuArrayType

 from pygpu.array import gpuarray
@@ -26,3 +26,6 @@ class test_gpu_Broadcast(test_Broadcast):
    #def rand_cval(self, shp):
    #    return rand_gpuarray(*shp, **dict(cls=gpuarray))

+
+class test_GpuDimShuffle(test_DimShuffle):
+    op = GpuDimShuffle
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -460,7 +460,7 @@ def dimshuffle_as_view(node):
    op = node.op
    if not isinstance(op, DimShuffle) or op.inplace:
        return False
-    new_op = DimShuffle(op.input_broadcastable, op.new_order, inplace=True)
+    new_op = op.__class__(op.input_broadcastable, op.new_order, inplace=True)
    return [new_op(*node.inputs)]

 #Step 60 is the inplace optimization stage.

--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -24,6 +24,7 @@ def FunctionGraph(i, o):


 class test_DimShuffle(unittest_tools.InferShapeTester):
+    op = DimShuffle

    def with_linker(self, linker):
        for xsh, shuffle, zsh in [((2, 3), (1, 'x', 0), (3, 1, 2)),
@@ -38,12 +39,12 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
                                  ((1,), ('x', 'x'), (1, 1))]:
            ib = [(entry == 1) for entry in xsh]
            x = TensorType('float64', ib)('x')
-            e = DimShuffle(ib, shuffle)(x)
+            e = self.op(ib, shuffle)(x)
            f = copy(linker).accept(FunctionGraph([x], [e])).make_function()
            assert f(numpy.ones(xsh)).shape == zsh
            #test that DimShuffle.infer_shape work correctly
            x = TensorType('float64', ib)('x')
-            e = DimShuffle(ib, shuffle)(x)
+            e = self.op(ib, shuffle)(x)
            f = copy(linker).accept(FunctionGraph([x], [e.
                shape])).make_function()
            assert all(f(numpy.ones(xsh))) == all(zsh)
@@ -51,12 +52,12 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
        # Test when we drop a axis that is not broadcastable
        ib = [False, True, False]
        x = TensorType('float64', ib)('x')
-        self.assertRaises(ValueError, DimShuffle, ib, shuffle)
+        self.assertRaises(ValueError, self.op, ib, shuffle)

        # Test when we drop a axis that don't have shape 1
        ib = [True, True, False]
        x = TensorType('float64', ib)('x')
-        e = DimShuffle(ib, (1, 2))(x)
+        e = self.op(ib, (1, 2))(x)
        f = copy(linker).accept(FunctionGraph([x], [e.shape])).make_function()
        self.assertRaises(TypeError, f, numpy.ones((2, 1, 4)))

@@ -89,8 +90,8 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
            adtens = TensorType('float64', ib)('x')
            adtens_val = numpy.ones(xsh)
            self._compile_and_check([adtens],
-                                    [DimShuffle(ib, shuffle)(adtens)],
-                                    [adtens_val], DimShuffle,
+                                    [self.op(ib, shuffle)(adtens)],
+                                    [adtens_val], self.op,
                                    warn=False)

    def test_too_big_rank(self):