Added support for negative axis in GpuCumsumOp

b586a75f · Marc-Alexandre Cote · e9328fdd · b586a75f · b586a75f · b586a75f
--- a/theano/sandbox/cuda/extra_ops.py
+++ b/theano/sandbox/cuda/extra_ops.py
@@ -25,8 +25,8 @@ class GpuCumsum(CumsumOp, GpuOp):
        self.max_grid_size1 = None
        self.max_grid_size2 = None
-# We must reuse the same method, not reimplement and call it.
+    # We must reuse the same method, not reimplement and call it.
-# Otherwise DebugMode will print many warnings.
+    # Otherwise DebugMode will print many warnings.
    perform = Op.perform
    def make_node(self, x):
@@ -37,8 +37,11 @@ class GpuCumsum(CumsumOp, GpuOp):
        if x.ndim > GpuCumsum.SUPPORTED_NDIMS:
            raise NotImplementedError('Only cumsum on 1D, 2D and 3D array are supported right now!')
-        if self.axis >= x.ndim:
+        if self.axis >= x.ndim or self.axis < -x.ndim:
            raise ValueError('axis(={1}) out of bounds'.format(self.axis))
+        elif self.axis < 0:
+            # Convert negative axis to positive axis.
+            self.axis += x.ndim
        return theano.Apply(self, [x], [x.type()])
@@ -352,7 +355,10 @@ class GpuCumsum(CumsumOp, GpuOp):
    def c_code(self, node, nodename, inames, onames, sub):
        x, = inames
        z, = onames
+        # We assume array has been already flattened if needed.
        axis = self.axis if self.axis is not None else 0
        fail = sub['fail']
        max_threads_dim0 = self.max_threads_dim0
@@ -408,11 +414,10 @@ class GpuCumsum(CumsumOp, GpuOp):
 def values_eq_approx_high_tol(a, b):
    """This fct is needed to don't have DebugMode raise useless
-    error due to ronding error.
+    error due to rounding error.
    This happen with big input size due to change in the order of
    operation.
    """
    rtol = None
    if a.size > 100000:
@@ -443,6 +448,7 @@ def use_gpu_cumsum(node):
        # ``gpu_cumsum`` assume array has been flattened if needed.
        if axis is None:
            axis = 0
        ret = host_from_gpu(GpuCumsum(axis)(x))
        ret.values_eq_approx = values_eq_approx_high_tol
        return [ret]
--- a/theano/sandbox/cuda/tests/test_extra_ops.py
+++ b/theano/sandbox/cuda/tests/test_extra_ops.py
@@ -47,7 +47,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
    def test_Strides1D(self):
        x = T.fvector('x')
-        for axis in [0, None]:
+        for axis in [0, None, -1]:
            a = np.random.random((42,)).astype("float32")
            cumsum_function = theano.function([x], cumsum(x, axis=axis),
                                              mode=self.mode)
@@ -70,7 +70,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
    def test_Strides2D(self):
        x = T.fmatrix('x')
-        for axis in [0, 1, None]:
+        for axis in [0, 1, None, -1, -2]:
            a = np.random.random((42, 30)).astype("float32")
            cumsum_function = theano.function([x], cumsum(x, axis=axis),
                                              mode=self.mode)
@@ -93,7 +93,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
    def test_Strides3D(self):
        x = T.ftensor3('x')
-        for axis in [0, 1, 2, None]:
+        for axis in [0, 1, 2, None, -1, -2, -3]:
            a = np.random.random((42, 30, 25)).astype("float32")
            cumsum_function = theano.function([x], cumsum(x, axis=axis),
                                              mode=self.mode)
@@ -139,7 +139,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
        block_max_size = self.max_threads_dim0 * 2
        x = T.fmatrix('x')
-        for shape_axis, axis in zip([0, 1, 0], [0, 1, None]):
+        for shape_axis, axis in zip([0, 1, 0, 1, 0], [0, 1, None, -1, -2]):
            f = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
            assert [n for n in f.maker.fgraph.toposort()
                    if isinstance(n.op, GpuCumsum)]
@@ -178,7 +178,7 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
        block_max_size = self.max_threads_dim0 * 2
        x = T.ftensor3('x')
-        for shape_axis, axis in zip([0, 1, 2, 0], [0, 1, 2, None]):
+        for shape_axis, axis in zip([0, 1, 2, 0, 2, 1, 0], [0, 1, 2, None, -1, -2, -3]):
            f = theano.function([x], cumsum(x, axis=axis), mode=self.mode)
            assert [n for n in f.maker.fgraph.toposort()
                    if isinstance(n.op, GpuCumsum)]

--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -29,8 +29,11 @@ class CumsumOp(theano.Op):
        if self.axis is None:
            out_type = theano.tensor.vector(dtype=x.dtype)  # Flatten
-        elif self.axis >= x.ndim:
+        elif self.axis >= x.ndim or self.axis < -x.ndim:
            raise ValueError('axis(={0}) out of bounds'.format(self.axis))
+        elif self.axis < 0:
+            # Convert negative axis to positive axis.
+            self.axis += x.ndim
        return theano.Apply(self, [x], [out_type])
@@ -151,8 +154,11 @@ class CumprodOp(theano.Op):
        if self.axis is None:
            out_type = theano.tensor.vector(dtype=x.dtype)  # Flatten
-        elif self.axis >= x.ndim:
+        elif self.axis >= x.ndim or self.axis < -x.ndim:
            raise ValueError('axis(={0}) out of bounds'.format(self.axis))
+        elif self.axis < 0:
+            # Convert negative axis to positive axis.
+            self.axis += x.ndim
        return theano.Apply(self, [x], [out_type])

--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -32,12 +32,13 @@ class TestCumsumOp(utt.InferShapeTester):
        a = np.random.random((3, 5, 2)).astype(config.floatX)
        # Test axis out of bounds
-        self.assertRaises(ValueError, cumsum, x, axis=4)
+        self.assertRaises(ValueError, cumsum, x, axis=3)
+        self.assertRaises(ValueError, cumsum, x, axis=-4)
        f = theano.function([x], cumsum(x))
        assert np.allclose(np.cumsum(a), f(a))  # Test axis=None
-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            f = theano.function([x], cumsum(x, axis=axis))
            assert np.allclose(np.cumsum(a, axis=axis), f(a))
@@ -51,7 +52,7 @@ class TestCumsumOp(utt.InferShapeTester):
                                [a],
                                self.op_class)
-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            self._compile_and_check([x],
                                    [cumsum(x, axis=axis)],
                                    [a],
@@ -62,7 +63,7 @@ class TestCumsumOp(utt.InferShapeTester):
        utt.verify_grad(self.op, [a])  # Test axis=None
-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            utt.verify_grad(self.op_class(axis=axis), [a], eps=4e-4)
@@ -77,10 +78,14 @@ class TestCumprodOp(utt.InferShapeTester):
        x = T.tensor3('x')
        a = np.random.random((3, 5, 2)).astype(config.floatX)
+        # Test axis out of bounds
+        self.assertRaises(ValueError, cumprod, x, axis=3)
+        self.assertRaises(ValueError, cumprod, x, axis=-4)
        f = theano.function([x], cumprod(x))
        assert np.allclose(np.cumprod(a), f(a))  # Test axis=None
-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            f = theano.function([x], cumprod(x, axis=axis))
            assert np.allclose(np.cumprod(a, axis=axis), f(a))
@@ -94,7 +99,7 @@ class TestCumprodOp(utt.InferShapeTester):
                                [a],
                                self.op_class)
-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            self._compile_and_check([x],
                                    [cumprod(x, axis=axis)],
                                    [a],
@@ -105,7 +110,7 @@ class TestCumprodOp(utt.InferShapeTester):
        utt.verify_grad(self.op, [a])  # Test axis=None
-        for axis in range(len(a.shape)):
+        for axis in range(-len(a.shape), len(a.shape)):
            utt.verify_grad(self.op_class(axis=axis), [a])