Merge pull request #5317 from khaotik/cumop

Merge CumsumOp/CumprodOp into CumOp

Merge pull request #5317 from khaotik/cumop
90dd93d0 · Frédéric Bastien · GitHub · 170aff07 · 1455b49c · 90dd93d0
--- a/theano/gpuarray/extra_ops.py
+++ b/theano/gpuarray/extra_ops.py
--- a/theano/gpuarray/tests/test_extra_ops.py
+++ b/theano/gpuarray/tests/test_extra_ops.py
--- a/theano/sandbox/cuda/extra_ops.py
+++ b/theano/sandbox/cuda/extra_ops.py
@@ -5,7 +5,7 @@ from theano import Op
 from theano.gof import local_optimizer
 from theano.sandbox.cuda import cuda_available, GpuOp
 from theano.sandbox.cuda.basic_ops import gpu_flatten
-from theano.tensor.extra_ops import CumsumOp
+from theano.tensor.extra_ops import CumOp
 if cuda_available:
    from theano.sandbox.cuda import CudaNdarrayType
@@ -13,7 +13,7 @@ if cuda_available:
    from theano.sandbox.cuda import register_opt as register_gpu_opt
-class GpuCumsum(CumsumOp, GpuOp):
+class GpuCumsum(CumOp, GpuOp):
    """
    Parameters
@@ -438,13 +438,16 @@ def values_eq_approx_high_tol(a, b):
 @register_gpu_opt()
-@local_optimizer([CumsumOp])
+@local_optimizer([CumOp])
 def use_gpu_cumsum(node):
-    if type(node.op) is CumsumOp \
+    if type(node.op) is CumOp \
       and node.inputs[0].dtype == 'float32' \
       and node.inputs[0].owner \
       and isinstance(node.inputs[0].owner.op, HostFromGpu):
+        if node.op.mode != 'add':
+            return None
        axis = node.op.axis
        x = node.inputs[0]

--- a/theano/sandbox/cuda/tests/test_extra_ops.py
+++ b/theano/sandbox/cuda/tests/test_extra_ops.py
@@ -7,7 +7,7 @@ import numpy as np
 from six.moves import xrange
 from theano import tensor as T
 import theano
-from theano.tensor.extra_ops import cumsum, CumsumOp
+from theano.tensor.extra_ops import cumsum, CumOp
 from theano.tests import unittest_tools as utt
 import theano.sandbox.cuda as cuda_ndarray
 if cuda_ndarray.cuda_available:
@@ -22,7 +22,7 @@ else:
    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
-class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
+class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumOp):
    mode = mode_with_gpu
    def setUp(self):
@@ -232,4 +232,4 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
        x = T.ftensor4('x')
        f = theano.function([x], cumsum(x, axis=1), mode=self.mode)
        assert [n for n in f.maker.fgraph.toposort()
-                if isinstance(n.op, CumsumOp)]
+                if isinstance(n.op, CumOp)]
--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -242,13 +242,16 @@ def searchsorted(x, v, side='left', sorter=None):
    return SearchsortedOp(side=side)(x, v, sorter)
-class CumsumOp(theano.Op):
+class CumOp(theano.Op):
-    # See function cumsum for docstring
+    # See function cumsum/cumprod for docstring
-    __props__ = ("axis",)
+    __props__ = ("axis", "mode")
-    def __init__(self, axis=None):
+    def __init__(self, axis=None, mode='add'):
+        if mode not in ('add', 'mul'):
+            raise ValueError('%s: Unknown mode "%s"' % (type(self).__name__, mode))
        self.axis = axis
+        self.mode = mode
    def make_node(self, x):
        x = basic.as_tensor_variable(x)
@@ -264,20 +267,39 @@ class CumsumOp(theano.Op):
    def perform(self, node, inputs, output_storage):
        x = inputs[0]
        z = output_storage[0]
-        z[0] = np.cumsum(x, axis=self.axis)
+        z[0] = {'add': np.cumsum, 'mul': np.cumprod}[self.mode](x, axis=self.axis)
    def grad(self, inputs, output_gradients):
-        [gi] = output_gradients
+        x, = inputs
+        gi, = output_gradients
        if self.axis is None:
-            return [cumsum(gi[::-1])[::-1].reshape(inputs[0].shape)]
+            if self.mode == 'add':
+                return [cumsum(gi[::-1])[::-1].reshape(x.shape)]
+            elif self.mode == 'mul':
+                fx = cumprod(x, axis=self.axis)
+                return [cumsum(
+                    (fx * gi)[::-1])[::-1].reshape(x.shape) / x]
+            else:
+                raise NotImplementedError(
+                    '%s: unknown gradient for mode "%s"' %
+                    (type(self).__name__, self.mode))
-        # We need to reverse the gradients along ``self.axis``,
-        #  compute cumsum, then reverse again
        reverse_slicing = [slice(None, None, None)] * gi.ndim
        reverse_slicing[self.axis] = slice(None, None, -1)
        reverse_slicing = tuple(reverse_slicing)
+        # We need to reverse the gradients along ``self.axis``,
+        #  compute cumsum, then reverse again
+        if self.mode == 'add':
            return [cumsum(gi[reverse_slicing], self.axis)[reverse_slicing]]
+        elif self.mode == 'mul':
+            fx = cumprod(x, axis=self.axis)
+            return [cumsum(
+                (fx * gi)[reverse_slicing], self.axis)[reverse_slicing] / x]
+        else:
+            raise NotImplementedError(
+                '%s: unknown gradient for mode "%s"' %
+                (type(self).__name__, self.mode))
    def infer_shape(self, node, shapes):
        if self.axis is None:
@@ -290,6 +312,7 @@ class CumsumOp(theano.Op):
        z, = onames
        axis = self.axis
        fail = sub['fail']
+        func = dict(mul='CumProd', add='CumSum')[self.mode]
        if self.axis is None or (self.axis == 0 and node.inputs[0].ndim == 1):
            code = """
@@ -303,13 +326,13 @@ class CumsumOp(theano.Op):
                if (!%(z)s)
                    %(fail)s;
                {
-                    PyObject * t = PyArray_CumSum(
+                    PyObject * t = PyArray_%(func)s(
                        %(x)s, NPY_MAXDIMS,
                        PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s);
                    if (!t){
                       %(fail)s;
                    }
-                    // Because PyArray_CumSum returns a newly created reference on t.
+                    // Because PyArray_%(func)s returns a newly created reference on t.
                    Py_XDECREF(t);
                }
            """ % locals()
@@ -325,13 +348,13 @@ class CumsumOp(theano.Op):
                    %(fail)s;
                {
-                    PyObject * t = PyArray_CumSum(
+                    PyObject * t = PyArray_%(func)s(
                        %(x)s, %(axis)s,
                        PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s);
                    if (!t){
                       %(fail)s;
                    }
-                    // Because PyArray_CumSum returns a newly created reference on t.
+                    // Because PyArray_%(func)s returns a newly created reference on t.
                    Py_XDECREF(t);
                }
            """ % locals()
@@ -339,10 +362,10 @@ class CumsumOp(theano.Op):
        return code
    def c_code_cache_version(self):
-        return (6,)
+        return (7,)
    def __str__(self):
-        return "%s{%s}" % (self.__class__.__name__, self.axis)
+        return "%s{%s, %s}" % (self.__class__.__name__, self.axis, self.mode)
 def cumsum(x, axis=None):
@@ -362,112 +385,7 @@ def cumsum(x, axis=None):
    .. versionadded:: 0.7
    """
-    return CumsumOp(axis=axis)(x)
+    return CumOp(axis=axis, mode='add')(x)
-class CumprodOp(theano.Op):
-    # See function cumprod for docstring
-    __props__ = ("axis",)
-    def __init__(self, axis=None):
-        self.axis = axis
-    def make_node(self, x):
-        x = basic.as_tensor_variable(x)
-        out_type = x.type()
-        if self.axis is None:
-            out_type = theano.tensor.vector(dtype=x.dtype)  # Flatten
-        elif self.axis >= x.ndim or self.axis < -x.ndim:
-            raise ValueError('axis(={0}) out of bounds'.format(self.axis))
-        return theano.Apply(self, [x], [out_type])
-    def perform(self, node, inputs, output_storage):
-        x = inputs[0]
-        z = output_storage[0]
-        z[0] = np.cumprod(x, axis=self.axis)
-    def grad(self, inputs, output_gradients):
-        x, = inputs
-        gi, = output_gradients
-        fx = cumprod(x, axis=self.axis)
-        if self.axis is None:
-            return [cumsum((fx * gi)[::-1])[::-1].reshape(inputs[0].shape) / x]
-        # We need to reverse the gradients along ``self.axis``,
-        #  compute cumsum, then reverse again
-        reverse_slicing = [slice(None, None, None)] * gi.ndim
-        reverse_slicing[self.axis] = slice(None, None, -1)
-        reverse_slicing = tuple(reverse_slicing)
-        return [cumsum((fx * gi)[reverse_slicing],
-                       self.axis)[reverse_slicing] / x]
-    def infer_shape(self, node, shapes):
-        if self.axis is None:
-            return [(tensor.prod(shapes[0]),)]  # Flatten
-        return shapes
-    def c_code(self, node, name, inames, onames, sub):
-        x, = inames
-        z, = onames
-        axis = self.axis
-        fail = sub['fail']
-        if self.axis is None or (self.axis == 0 and node.inputs[0].ndim == 1):
-            code = """
-                npy_intp shape[1] = { PyArray_SIZE(%(x)s) };
-                if(!(%(z)s && PyArray_DIMS(%(z)s)[0] == shape[0]))
-                {
-                    Py_XDECREF(%(z)s);
-                    %(z)s = (PyArrayObject*) PyArray_SimpleNew(1, shape, PyArray_TYPE((PyArrayObject*) py_%(x)s));
-                }
-                if (!%(z)s)
-                    %(fail)s;
-                {
-                    PyObject * t = PyArray_CumProd(
-                        %(x)s, NPY_MAXDIMS,
-                        PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s);
-                    if (!t){
-                       %(fail)s;
-                    }
-                    // Because PyArray_CumSum returns a newly created reference on t.
-                    Py_XDECREF(t);
-                }
-            """ % locals()
-        else:
-            code = """
-                if(!(%(z)s && PyArray_CompareLists(PyArray_DIMS(%(z)s), PyArray_DIMS(%(x)s), PyArray_NDIM(%(x)s)) ))
-                {
-                    Py_XDECREF(%(z)s);
-                    %(z)s = (PyArrayObject*) PyArray_SimpleNew(PyArray_NDIM(%(x)s), PyArray_DIMS(%(x)s), PyArray_TYPE((PyArrayObject*) py_%(x)s));
-                }
-                if (!%(z)s)
-                    %(fail)s;
-                {
-                    PyObject * t = PyArray_CumProd(
-                        %(x)s, %(axis)s,
-                        PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s);
-                    if (!t){
-                       %(fail)s;
-                    }
-                    // Because PyArray_CumSum returns a newly created reference on t.
-                    Py_XDECREF(t);
-                }
-            """ % locals()
-        return code
-    def c_code_cache_version(self):
-        return (4,)
-    def __str__(self):
-        return "%s{%s}" % (self.__class__.__name__, self.axis)
 def cumprod(x, axis=None):
@@ -488,7 +406,27 @@ def cumprod(x, axis=None):
    .. versionadded:: 0.7
    """
-    return CumprodOp(axis=axis)(x)
+    return CumOp(axis=axis, mode='mul')(x)
+# CumsumOp and CumprodOp are for compatibility with old version,
+# just in case unpickling a theano function with old Ops.
+class CumsumOp(theano.Op):
+    __props__ = ("axis",)
+    def __new__(typ, *args, **kwargs):
+        obj = object.__new__(CumOp, *args, **kwargs)
+        obj.mode = 'add'
+        return obj
+class CumprodOp(theano.Op):
+    __props__ = ("axis",)
+    def __new__(typ, *args, **kwargs):
+        obj = object.__new__(CumOp, *args, **kwargs)
+        obj.mode = 'mul'
+        return obj
 class DiffOp(theano.Op):

--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
 from __future__ import absolute_import, print_function, division
+from functools import partial
 import numpy as np
 import numpy
@@ -7,7 +8,7 @@ import theano
 from theano.tests import unittest_tools as utt
 from theano.tensor.extra_ops import (SearchsortedOp, searchsorted,
-                                     CumsumOp, cumsum, CumprodOp, cumprod,
+                                     CumOp, cumsum, cumprod,
                                     CpuContiguous, cpu_contiguous,
                                     bincount, DiffOp, diff, squeeze, compress,
                                     RepeatOp, repeat, Bartlett, bartlett,
@@ -121,74 +122,33 @@ class TestSearchsortedOp(utt.InferShapeTester):
        utt.verify_grad(self.op, [self.a[self.idx_sorted], self.b])
-class TestCumsumOp(utt.InferShapeTester):
+class TestCumOp(utt.InferShapeTester):
    def setUp(self):
-        super(TestCumsumOp, self).setUp()
+        super(TestCumOp, self).setUp()
-        self.op_class = CumsumOp
+        self.op_class = CumOp
-        self.op = CumsumOp()
+        self.op = CumOp()
-    def test_cumsumOp(self):
+    def test_cum_op(self):
        x = T.tensor3('x')
        a = np.random.random((3, 5, 2)).astype(config.floatX)
        # Test axis out of bounds
        self.assertRaises(ValueError, cumsum, x, axis=3)
        self.assertRaises(ValueError, cumsum, x, axis=-4)
-        f = theano.function([x], cumsum(x))
-        assert np.allclose(np.cumsum(a), f(a))  # Test axis=None
-        for axis in range(-len(a.shape), len(a.shape)):
-            f = theano.function([x], cumsum(x, axis=axis))
-            assert np.allclose(np.cumsum(a, axis=axis), f(a))
-    def test_infer_shape(self):
-        x = T.tensor3('x')
-        a = np.random.random((3, 5, 2)).astype(config.floatX)
-        # Test axis=None
-        self._compile_and_check([x],
-                                [self.op(x)],
-                                [a],
-                                self.op_class)
-        for axis in range(-len(a.shape), len(a.shape)):
-            self._compile_and_check([x],
-                                    [cumsum(x, axis=axis)],
-                                    [a],
-                                    self.op_class)
-    def test_grad(self):
-        a = np.random.random((3, 5, 2)).astype(config.floatX)
-        utt.verify_grad(self.op, [a])  # Test axis=None
-        for axis in range(-len(a.shape), len(a.shape)):
-            utt.verify_grad(self.op_class(axis=axis), [a], eps=4e-4)
-class TestCumprodOp(utt.InferShapeTester):
-    def setUp(self):
-        super(TestCumprodOp, self).setUp()
-        self.op_class = CumprodOp
-        self.op = CumprodOp()
-    def test_CumprodOp(self):
-        x = T.tensor3('x')
-        a = np.random.random((3, 5, 2)).astype(config.floatX)
-        # Test axis out of bounds
        self.assertRaises(ValueError, cumprod, x, axis=3)
        self.assertRaises(ValueError, cumprod, x, axis=-4)
-        f = theano.function([x], cumprod(x))
+        f = theano.function([x], [cumsum(x), cumprod(x)])
-        assert np.allclose(np.cumprod(a), f(a))  # Test axis=None
+        s, p = f(a)
+        assert np.allclose(np.cumsum(a), s)  # Test axis=None
+        assert np.allclose(np.cumprod(a), p)  # Test axis=None
        for axis in range(-len(a.shape), len(a.shape)):
-            f = theano.function([x], cumprod(x, axis=axis))
+            f = theano.function([x], [cumsum(x, axis=axis), cumprod(x, axis=axis)])
-            assert np.allclose(np.cumprod(a, axis=axis), f(a))
+            s, p = f(a)
+            assert np.allclose(np.cumsum(a, axis=axis), s)
+            assert np.allclose(np.cumprod(a, axis=axis), p)
    def test_infer_shape(self):
        x = T.tensor3('x')
@@ -202,17 +162,19 @@ class TestCumprodOp(utt.InferShapeTester):
        for axis in range(-len(a.shape), len(a.shape)):
            self._compile_and_check([x],
-                                    [cumprod(x, axis=axis)],
+                                    [cumsum(x, axis=axis)],
                                    [a],
                                    self.op_class)
    def test_grad(self):
        a = np.random.random((3, 5, 2)).astype(config.floatX)
-        utt.verify_grad(self.op, [a])  # Test axis=None
+        utt.verify_grad(self.op_class(mode='add'), [a])  # Test axis=None
+        utt.verify_grad(self.op_class(mode='mul'), [a])  # Test axis=None
        for axis in range(-len(a.shape), len(a.shape)):
-            utt.verify_grad(self.op_class(axis=axis), [a])
+            utt.verify_grad(self.op_class(axis=axis, mode='add'), [a], eps=4e-4)
+            utt.verify_grad(self.op_class(axis=axis, mode='mul'), [a], eps=4e-4)
 class TestBinCount(utt.InferShapeTester):