Merge pull request #2716 from MarcCote/fix_cumsum_negative_axis

Added support for negative axis in GpuCumsumOp

Merge pull request #2716 from MarcCote/fix_cumsum_negative_axis
e85ae3bd · abergeron · b3bf808f · 68da6cb5 · e85ae3bd · e85ae3bd
--- a/theano/sandbox/cuda/extra_ops.py
+++ b/theano/sandbox/cuda/extra_ops.py
@@ -25,8 +25,8 @@ class GpuCumsum(CumsumOp, GpuOp):
        self.max_grid_size1 = None
        self.max_grid_size2 = None

-# We must reuse the same method, not reimplement and call it.
-# Otherwise DebugMode will print many warnings.
+    # We must reuse the same method, not reimplement and call it.
+    # Otherwise DebugMode will print many warnings.
    perform = Op.perform

    def make_node(self, x):
@@ -37,7 +37,7 @@ class GpuCumsum(CumsumOp, GpuOp):
        if x.ndim > GpuCumsum.SUPPORTED_NDIMS:
            raise NotImplementedError('Only cumsum on 1D, 2D and 3D array are supported right now!')

-        if self.axis >= x.ndim:
+        if self.axis >= x.ndim or self.axis < -x.ndim:
            raise ValueError('axis(={1}) out of bounds'.format(self.axis))

        return theano.Apply(self, [x], [x.type()])
@@ -69,7 +69,7 @@ class GpuCumsum(CumsumOp, GpuOp):
        return "%s{%s}" % (self.__class__.__name__, self.axis)

    def c_code_cache_version(self):
-        return (7,)
+        return (8,)

    def c_support_code_apply(self, node, nodename):
        return """
@@ -352,6 +352,8 @@ class GpuCumsum(CumsumOp, GpuOp):
    def c_code(self, node, nodename, inames, onames, sub):
        x, = inames
        z, = onames
+
+        # We assume array has been already flattened if needed.
        axis = self.axis if self.axis is not None else 0
        fail = sub['fail']

@@ -368,6 +370,12 @@ class GpuCumsum(CumsumOp, GpuOp):
            const int* shape = CudaNdarray_HOST_DIMS(%(x)s);
            bool needAllocation = !%(z)s || CudaNdarray_NDIM(%(x)s) != CudaNdarray_NDIM(%(z)s);

+            int axis = %(axis)s;
+            if (axis < 0) {
+                // Convert negative axis to positive axis.
+                axis += CudaNdarray_NDIM(%(x)s);
+            }
+
            // If output is already allocated, check if its shape matches the input's one.
            if (!needAllocation) {
                for (int i= 0; i < CudaNdarray_NDIM(%(x)s); ++i) {
@@ -387,7 +395,7 @@ class GpuCumsum(CumsumOp, GpuOp):
            }

            { // Namespace for kernel calls //
-                if (cumSum_%(nodename)s(%(x)s, %(z)s, %(axis)s, %(max_threads_dim0)s, %(max_grid_size1)s, %(max_grid_size2)s) == -1){
+                if (cumSum_%(nodename)s(%(x)s, %(z)s, axis, %(max_threads_dim0)s, %(max_grid_size1)s, %(max_grid_size2)s) == -1){
                    %(fail)s;
                }

@@ -408,11 +416,10 @@ class GpuCumsum(CumsumOp, GpuOp):

 def values_eq_approx_high_tol(a, b):
    """This fct is needed to don't have DebugMode raise useless
-    error due to ronding error.
+    error due to rounding error.

    This happen with big input size due to change in the order of
    operation.
-
    """
    rtol = None
    if a.size > 100000:
@@ -443,6 +450,7 @@ def use_gpu_cumsum(node):
        # ``gpu_cumsum`` assume array has been flattened if needed.
        if axis is None:
            axis = 0
+
        ret = host_from_gpu(GpuCumsum(axis)(x))
        ret.values_eq_approx = values_eq_approx_high_tol
        return [ret]
--- a/theano/sandbox/cuda/tests/test_extra_ops.py
+++ b/theano/sandbox/cuda/tests/test_extra_ops.py
@@ -47,7 +47,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
    def test_Strides1D(self):
        x = T.fvector('x')

-        for axis in [0, None]:
+        for axis in [0, None, -1]:
            a = np.random.random((42,)).astype("float32")
            cumsum_function = theano.function([x], cumsum(x, axis=axis),
                                              mode=self.mode)
@@ -70,7 +70,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
    def test_Strides2D(self):
        x = T.fmatrix('x')

-        for axis in [0, 1, None]:
+        for axis in [0, 1, None, -1, -2]:
            a = np.random.random((42, 30)).astype("float32")
            cumsum_function = theano.function([x], cumsum(x, axis=axis),
                                              mode=self.mode)
@@ -93,7 +93,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
    def test_Strides3D(self):
        x = T.ftensor3('x')

-        for axis in [0, 1, 2, None]:
+        for axis in [0, 1, 2, None, -1, -2, -3]:
            a = np.random.random((42, 30, 25)).astype("float32")
            cumsum_function = theano.function([x], cumsum(x, axis=axis),
                                              mode=self.mode)
@@ -139,7 +139,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
        block_max_size = self.max_threads_dim0 * 2

        x = T.fmatrix('x')
-        for shape_axis, axis in zip([0, 1, 0], [0, 1, None]):
+        for shape_axis, axis in zip([0, 1, 0, 1, 0], [0, 1, None, -1, -2]):
            f = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
            assert [n for n in f.maker.fgraph.toposort()
                    if isinstance(n.op, GpuCumsum)]
@@ -178,7 +178,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
        block_max_size = self.max_threads_dim0 * 2

        x = T.ftensor3('x')
-        for shape_axis, axis in zip([0, 1, 2, 0], [0, 1, 2, None]):
+        for shape_axis, axis in zip([0, 1, 2, 0, 2, 1, 0], [0, 1, 2, None, -1, -2, -3]):
            f = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
            assert [n for n in f.maker.fgraph.toposort()
                    if isinstance(n.op, GpuCumsum)]

--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -29,7 +29,7 @@ class CumsumOp(theano.Op):

        if self.axis is None:
            out_type = theano.tensor.vector(dtype=x.dtype)  # Flatten
-        elif self.axis >= x.ndim:
+        elif self.axis >= x.ndim or self.axis < -x.ndim:
            raise ValueError('axis(={0}) out of bounds'.format(self.axis))

        return theano.Apply(self, [x], [out_type])
@@ -151,7 +151,7 @@ class CumprodOp(theano.Op):

        if self.axis is None:
            out_type = theano.tensor.vector(dtype=x.dtype)  # Flatten
-        elif self.axis >= x.ndim:
+        elif self.axis >= x.ndim or self.axis < -x.ndim:
            raise ValueError('axis(={0}) out of bounds'.format(self.axis))

        return theano.Apply(self, [x], [out_type])

--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -32,12 +32,13 @@ class TestCumsumOp(utt.InferShapeTester):
        a = np.random.random((3, 5, 2)).astype(config.floatX)

        # Test axis out of bounds
-        self.assertRaises(ValueError, cumsum, x, axis=4)
+        self.assertRaises(ValueError, cumsum, x, axis=3)
+        self.assertRaises(ValueError, cumsum, x, axis=-4)

        f = theano.function([x], cumsum(x))
        assert np.allclose(np.cumsum(a), f(a))  # Test axis=None

-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            f = theano.function([x], cumsum(x, axis=axis))
            assert np.allclose(np.cumsum(a, axis=axis), f(a))

@@ -51,7 +52,7 @@ class TestCumsumOp(utt.InferShapeTester):
                                [a],
                                self.op_class)

-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            self._compile_and_check([x],
                                    [cumsum(x, axis=axis)],
                                    [a],
@@ -62,7 +63,7 @@ class TestCumsumOp(utt.InferShapeTester):

        utt.verify_grad(self.op, [a])  # Test axis=None

-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            utt.verify_grad(self.op_class(axis=axis), [a], eps=4e-4)


@@ -77,10 +78,14 @@ class TestCumprodOp(utt.InferShapeTester):
        x = T.tensor3('x')
        a = np.random.random((3, 5, 2)).astype(config.floatX)

+        # Test axis out of bounds
+        self.assertRaises(ValueError, cumprod, x, axis=3)
+        self.assertRaises(ValueError, cumprod, x, axis=-4)
+
        f = theano.function([x], cumprod(x))
        assert np.allclose(np.cumprod(a), f(a))  # Test axis=None

-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            f = theano.function([x], cumprod(x, axis=axis))
            assert np.allclose(np.cumprod(a, axis=axis), f(a))

@@ -94,7 +99,7 @@ class TestCumprodOp(utt.InferShapeTester):
                                [a],
                                self.op_class)

-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            self._compile_and_check([x],
                                    [cumprod(x, axis=axis)],
                                    [a],
@@ -105,7 +110,7 @@ class TestCumprodOp(utt.InferShapeTester):

        utt.verify_grad(self.op, [a])  # Test axis=None

-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            utt.verify_grad(self.op_class(axis=axis), [a])