提交 90dd93d0 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5317 from khaotik/cumop

Merge CumsumOp/CumprodOp into CumOp
...@@ -5,7 +5,7 @@ from theano import Op ...@@ -5,7 +5,7 @@ from theano import Op
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available, GpuOp from theano.sandbox.cuda import cuda_available, GpuOp
from theano.sandbox.cuda.basic_ops import gpu_flatten from theano.sandbox.cuda.basic_ops import gpu_flatten
from theano.tensor.extra_ops import CumsumOp from theano.tensor.extra_ops import CumOp
if cuda_available: if cuda_available:
from theano.sandbox.cuda import CudaNdarrayType from theano.sandbox.cuda import CudaNdarrayType
...@@ -13,7 +13,7 @@ if cuda_available: ...@@ -13,7 +13,7 @@ if cuda_available:
from theano.sandbox.cuda import register_opt as register_gpu_opt from theano.sandbox.cuda import register_opt as register_gpu_opt
class GpuCumsum(CumsumOp, GpuOp): class GpuCumsum(CumOp, GpuOp):
""" """
Parameters Parameters
...@@ -438,13 +438,16 @@ def values_eq_approx_high_tol(a, b): ...@@ -438,13 +438,16 @@ def values_eq_approx_high_tol(a, b):
@register_gpu_opt() @register_gpu_opt()
@local_optimizer([CumsumOp]) @local_optimizer([CumOp])
def use_gpu_cumsum(node): def use_gpu_cumsum(node):
if type(node.op) is CumsumOp \ if type(node.op) is CumOp \
and node.inputs[0].dtype == 'float32' \ and node.inputs[0].dtype == 'float32' \
and node.inputs[0].owner \ and node.inputs[0].owner \
and isinstance(node.inputs[0].owner.op, HostFromGpu): and isinstance(node.inputs[0].owner.op, HostFromGpu):
if node.op.mode != 'add':
return None
axis = node.op.axis axis = node.op.axis
x = node.inputs[0] x = node.inputs[0]
......
...@@ -7,7 +7,7 @@ import numpy as np ...@@ -7,7 +7,7 @@ import numpy as np
from six.moves import xrange from six.moves import xrange
from theano import tensor as T from theano import tensor as T
import theano import theano
from theano.tensor.extra_ops import cumsum, CumsumOp from theano.tensor.extra_ops import cumsum, CumOp
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available: if cuda_ndarray.cuda_available:
...@@ -22,7 +22,7 @@ else: ...@@ -22,7 +22,7 @@ else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumOp):
mode = mode_with_gpu mode = mode_with_gpu
def setUp(self): def setUp(self):
...@@ -232,4 +232,4 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp): ...@@ -232,4 +232,4 @@ class TestGpuCumsum(theano.tensor.tests.test_extra_ops.TestCumsumOp):
x = T.ftensor4('x') x = T.ftensor4('x')
f = theano.function([x], cumsum(x, axis=1), mode=self.mode) f = theano.function([x], cumsum(x, axis=1), mode=self.mode)
assert [n for n in f.maker.fgraph.toposort() assert [n for n in f.maker.fgraph.toposort()
if isinstance(n.op, CumsumOp)] if isinstance(n.op, CumOp)]
...@@ -242,13 +242,16 @@ def searchsorted(x, v, side='left', sorter=None): ...@@ -242,13 +242,16 @@ def searchsorted(x, v, side='left', sorter=None):
return SearchsortedOp(side=side)(x, v, sorter) return SearchsortedOp(side=side)(x, v, sorter)
class CumsumOp(theano.Op): class CumOp(theano.Op):
# See function cumsum for docstring # See function cumsum/cumprod for docstring
__props__ = ("axis",) __props__ = ("axis", "mode")
def __init__(self, axis=None): def __init__(self, axis=None, mode='add'):
if mode not in ('add', 'mul'):
raise ValueError('%s: Unknown mode "%s"' % (type(self).__name__, mode))
self.axis = axis self.axis = axis
self.mode = mode
def make_node(self, x): def make_node(self, x):
x = basic.as_tensor_variable(x) x = basic.as_tensor_variable(x)
...@@ -264,20 +267,39 @@ class CumsumOp(theano.Op): ...@@ -264,20 +267,39 @@ class CumsumOp(theano.Op):
def perform(self, node, inputs, output_storage): def perform(self, node, inputs, output_storage):
x = inputs[0] x = inputs[0]
z = output_storage[0] z = output_storage[0]
z[0] = np.cumsum(x, axis=self.axis) z[0] = {'add': np.cumsum, 'mul': np.cumprod}[self.mode](x, axis=self.axis)
def grad(self, inputs, output_gradients): def grad(self, inputs, output_gradients):
[gi] = output_gradients x, = inputs
gi, = output_gradients
if self.axis is None: if self.axis is None:
return [cumsum(gi[::-1])[::-1].reshape(inputs[0].shape)] if self.mode == 'add':
return [cumsum(gi[::-1])[::-1].reshape(x.shape)]
elif self.mode == 'mul':
fx = cumprod(x, axis=self.axis)
return [cumsum(
(fx * gi)[::-1])[::-1].reshape(x.shape) / x]
else:
raise NotImplementedError(
'%s: unknown gradient for mode "%s"' %
(type(self).__name__, self.mode))
# We need to reverse the gradients along ``self.axis``,
# compute cumsum, then reverse again
reverse_slicing = [slice(None, None, None)] * gi.ndim reverse_slicing = [slice(None, None, None)] * gi.ndim
reverse_slicing[self.axis] = slice(None, None, -1) reverse_slicing[self.axis] = slice(None, None, -1)
reverse_slicing = tuple(reverse_slicing) reverse_slicing = tuple(reverse_slicing)
# We need to reverse the gradients along ``self.axis``,
# compute cumsum, then reverse again
if self.mode == 'add':
return [cumsum(gi[reverse_slicing], self.axis)[reverse_slicing]] return [cumsum(gi[reverse_slicing], self.axis)[reverse_slicing]]
elif self.mode == 'mul':
fx = cumprod(x, axis=self.axis)
return [cumsum(
(fx * gi)[reverse_slicing], self.axis)[reverse_slicing] / x]
else:
raise NotImplementedError(
'%s: unknown gradient for mode "%s"' %
(type(self).__name__, self.mode))
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
if self.axis is None: if self.axis is None:
...@@ -290,6 +312,7 @@ class CumsumOp(theano.Op): ...@@ -290,6 +312,7 @@ class CumsumOp(theano.Op):
z, = onames z, = onames
axis = self.axis axis = self.axis
fail = sub['fail'] fail = sub['fail']
func = dict(mul='CumProd', add='CumSum')[self.mode]
if self.axis is None or (self.axis == 0 and node.inputs[0].ndim == 1): if self.axis is None or (self.axis == 0 and node.inputs[0].ndim == 1):
code = """ code = """
...@@ -303,13 +326,13 @@ class CumsumOp(theano.Op): ...@@ -303,13 +326,13 @@ class CumsumOp(theano.Op):
if (!%(z)s) if (!%(z)s)
%(fail)s; %(fail)s;
{ {
PyObject * t = PyArray_CumSum( PyObject * t = PyArray_%(func)s(
%(x)s, NPY_MAXDIMS, %(x)s, NPY_MAXDIMS,
PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s); PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s);
if (!t){ if (!t){
%(fail)s; %(fail)s;
} }
// Because PyArray_CumSum returns a newly created reference on t. // Because PyArray_%(func)s returns a newly created reference on t.
Py_XDECREF(t); Py_XDECREF(t);
} }
""" % locals() """ % locals()
...@@ -325,13 +348,13 @@ class CumsumOp(theano.Op): ...@@ -325,13 +348,13 @@ class CumsumOp(theano.Op):
%(fail)s; %(fail)s;
{ {
PyObject * t = PyArray_CumSum( PyObject * t = PyArray_%(func)s(
%(x)s, %(axis)s, %(x)s, %(axis)s,
PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s); PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s);
if (!t){ if (!t){
%(fail)s; %(fail)s;
} }
// Because PyArray_CumSum returns a newly created reference on t. // Because PyArray_%(func)s returns a newly created reference on t.
Py_XDECREF(t); Py_XDECREF(t);
} }
""" % locals() """ % locals()
...@@ -339,10 +362,10 @@ class CumsumOp(theano.Op): ...@@ -339,10 +362,10 @@ class CumsumOp(theano.Op):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return (6,) return (7,)
def __str__(self): def __str__(self):
return "%s{%s}" % (self.__class__.__name__, self.axis) return "%s{%s, %s}" % (self.__class__.__name__, self.axis, self.mode)
def cumsum(x, axis=None): def cumsum(x, axis=None):
...@@ -362,112 +385,7 @@ def cumsum(x, axis=None): ...@@ -362,112 +385,7 @@ def cumsum(x, axis=None):
.. versionadded:: 0.7 .. versionadded:: 0.7
""" """
return CumsumOp(axis=axis)(x) return CumOp(axis=axis, mode='add')(x)
class CumprodOp(theano.Op):
# See function cumprod for docstring
__props__ = ("axis",)
def __init__(self, axis=None):
self.axis = axis
def make_node(self, x):
x = basic.as_tensor_variable(x)
out_type = x.type()
if self.axis is None:
out_type = theano.tensor.vector(dtype=x.dtype) # Flatten
elif self.axis >= x.ndim or self.axis < -x.ndim:
raise ValueError('axis(={0}) out of bounds'.format(self.axis))
return theano.Apply(self, [x], [out_type])
def perform(self, node, inputs, output_storage):
x = inputs[0]
z = output_storage[0]
z[0] = np.cumprod(x, axis=self.axis)
def grad(self, inputs, output_gradients):
x, = inputs
gi, = output_gradients
fx = cumprod(x, axis=self.axis)
if self.axis is None:
return [cumsum((fx * gi)[::-1])[::-1].reshape(inputs[0].shape) / x]
# We need to reverse the gradients along ``self.axis``,
# compute cumsum, then reverse again
reverse_slicing = [slice(None, None, None)] * gi.ndim
reverse_slicing[self.axis] = slice(None, None, -1)
reverse_slicing = tuple(reverse_slicing)
return [cumsum((fx * gi)[reverse_slicing],
self.axis)[reverse_slicing] / x]
def infer_shape(self, node, shapes):
if self.axis is None:
return [(tensor.prod(shapes[0]),)] # Flatten
return shapes
def c_code(self, node, name, inames, onames, sub):
x, = inames
z, = onames
axis = self.axis
fail = sub['fail']
if self.axis is None or (self.axis == 0 and node.inputs[0].ndim == 1):
code = """
npy_intp shape[1] = { PyArray_SIZE(%(x)s) };
if(!(%(z)s && PyArray_DIMS(%(z)s)[0] == shape[0]))
{
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, shape, PyArray_TYPE((PyArrayObject*) py_%(x)s));
}
if (!%(z)s)
%(fail)s;
{
PyObject * t = PyArray_CumProd(
%(x)s, NPY_MAXDIMS,
PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s);
if (!t){
%(fail)s;
}
// Because PyArray_CumSum returns a newly created reference on t.
Py_XDECREF(t);
}
""" % locals()
else:
code = """
if(!(%(z)s && PyArray_CompareLists(PyArray_DIMS(%(z)s), PyArray_DIMS(%(x)s), PyArray_NDIM(%(x)s)) ))
{
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_SimpleNew(PyArray_NDIM(%(x)s), PyArray_DIMS(%(x)s), PyArray_TYPE((PyArrayObject*) py_%(x)s));
}
if (!%(z)s)
%(fail)s;
{
PyObject * t = PyArray_CumProd(
%(x)s, %(axis)s,
PyArray_TYPE((PyArrayObject*) py_%(x)s), %(z)s);
if (!t){
%(fail)s;
}
// Because PyArray_CumSum returns a newly created reference on t.
Py_XDECREF(t);
}
""" % locals()
return code
def c_code_cache_version(self):
return (4,)
def __str__(self):
return "%s{%s}" % (self.__class__.__name__, self.axis)
def cumprod(x, axis=None): def cumprod(x, axis=None):
...@@ -488,7 +406,27 @@ def cumprod(x, axis=None): ...@@ -488,7 +406,27 @@ def cumprod(x, axis=None):
.. versionadded:: 0.7 .. versionadded:: 0.7
""" """
return CumprodOp(axis=axis)(x) return CumOp(axis=axis, mode='mul')(x)
# CumsumOp and CumprodOp are for compatibility with old version,
# just in case unpickling a theano function with old Ops.
class CumsumOp(theano.Op):
__props__ = ("axis",)
def __new__(typ, *args, **kwargs):
obj = object.__new__(CumOp, *args, **kwargs)
obj.mode = 'add'
return obj
class CumprodOp(theano.Op):
__props__ = ("axis",)
def __new__(typ, *args, **kwargs):
obj = object.__new__(CumOp, *args, **kwargs)
obj.mode = 'mul'
return obj
class DiffOp(theano.Op): class DiffOp(theano.Op):
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
from functools import partial
import numpy as np import numpy as np
import numpy import numpy
...@@ -7,7 +8,7 @@ import theano ...@@ -7,7 +8,7 @@ import theano
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.extra_ops import (SearchsortedOp, searchsorted, from theano.tensor.extra_ops import (SearchsortedOp, searchsorted,
CumsumOp, cumsum, CumprodOp, cumprod, CumOp, cumsum, cumprod,
CpuContiguous, cpu_contiguous, CpuContiguous, cpu_contiguous,
bincount, DiffOp, diff, squeeze, compress, bincount, DiffOp, diff, squeeze, compress,
RepeatOp, repeat, Bartlett, bartlett, RepeatOp, repeat, Bartlett, bartlett,
...@@ -121,74 +122,33 @@ class TestSearchsortedOp(utt.InferShapeTester): ...@@ -121,74 +122,33 @@ class TestSearchsortedOp(utt.InferShapeTester):
utt.verify_grad(self.op, [self.a[self.idx_sorted], self.b]) utt.verify_grad(self.op, [self.a[self.idx_sorted], self.b])
class TestCumsumOp(utt.InferShapeTester): class TestCumOp(utt.InferShapeTester):
def setUp(self): def setUp(self):
super(TestCumsumOp, self).setUp() super(TestCumOp, self).setUp()
self.op_class = CumsumOp self.op_class = CumOp
self.op = CumsumOp() self.op = CumOp()
def test_cumsumOp(self): def test_cum_op(self):
x = T.tensor3('x') x = T.tensor3('x')
a = np.random.random((3, 5, 2)).astype(config.floatX) a = np.random.random((3, 5, 2)).astype(config.floatX)
# Test axis out of bounds # Test axis out of bounds
self.assertRaises(ValueError, cumsum, x, axis=3) self.assertRaises(ValueError, cumsum, x, axis=3)
self.assertRaises(ValueError, cumsum, x, axis=-4) self.assertRaises(ValueError, cumsum, x, axis=-4)
f = theano.function([x], cumsum(x))
assert np.allclose(np.cumsum(a), f(a)) # Test axis=None
for axis in range(-len(a.shape), len(a.shape)):
f = theano.function([x], cumsum(x, axis=axis))
assert np.allclose(np.cumsum(a, axis=axis), f(a))
def test_infer_shape(self):
x = T.tensor3('x')
a = np.random.random((3, 5, 2)).astype(config.floatX)
# Test axis=None
self._compile_and_check([x],
[self.op(x)],
[a],
self.op_class)
for axis in range(-len(a.shape), len(a.shape)):
self._compile_and_check([x],
[cumsum(x, axis=axis)],
[a],
self.op_class)
def test_grad(self):
a = np.random.random((3, 5, 2)).astype(config.floatX)
utt.verify_grad(self.op, [a]) # Test axis=None
for axis in range(-len(a.shape), len(a.shape)):
utt.verify_grad(self.op_class(axis=axis), [a], eps=4e-4)
class TestCumprodOp(utt.InferShapeTester):
def setUp(self):
super(TestCumprodOp, self).setUp()
self.op_class = CumprodOp
self.op = CumprodOp()
def test_CumprodOp(self):
x = T.tensor3('x')
a = np.random.random((3, 5, 2)).astype(config.floatX)
# Test axis out of bounds
self.assertRaises(ValueError, cumprod, x, axis=3) self.assertRaises(ValueError, cumprod, x, axis=3)
self.assertRaises(ValueError, cumprod, x, axis=-4) self.assertRaises(ValueError, cumprod, x, axis=-4)
f = theano.function([x], cumprod(x)) f = theano.function([x], [cumsum(x), cumprod(x)])
assert np.allclose(np.cumprod(a), f(a)) # Test axis=None s, p = f(a)
assert np.allclose(np.cumsum(a), s) # Test axis=None
assert np.allclose(np.cumprod(a), p) # Test axis=None
for axis in range(-len(a.shape), len(a.shape)): for axis in range(-len(a.shape), len(a.shape)):
f = theano.function([x], cumprod(x, axis=axis)) f = theano.function([x], [cumsum(x, axis=axis), cumprod(x, axis=axis)])
assert np.allclose(np.cumprod(a, axis=axis), f(a)) s, p = f(a)
assert np.allclose(np.cumsum(a, axis=axis), s)
assert np.allclose(np.cumprod(a, axis=axis), p)
def test_infer_shape(self): def test_infer_shape(self):
x = T.tensor3('x') x = T.tensor3('x')
...@@ -202,17 +162,19 @@ class TestCumprodOp(utt.InferShapeTester): ...@@ -202,17 +162,19 @@ class TestCumprodOp(utt.InferShapeTester):
for axis in range(-len(a.shape), len(a.shape)): for axis in range(-len(a.shape), len(a.shape)):
self._compile_and_check([x], self._compile_and_check([x],
[cumprod(x, axis=axis)], [cumsum(x, axis=axis)],
[a], [a],
self.op_class) self.op_class)
def test_grad(self): def test_grad(self):
a = np.random.random((3, 5, 2)).astype(config.floatX) a = np.random.random((3, 5, 2)).astype(config.floatX)
utt.verify_grad(self.op, [a]) # Test axis=None utt.verify_grad(self.op_class(mode='add'), [a]) # Test axis=None
utt.verify_grad(self.op_class(mode='mul'), [a]) # Test axis=None
for axis in range(-len(a.shape), len(a.shape)): for axis in range(-len(a.shape), len(a.shape)):
utt.verify_grad(self.op_class(axis=axis), [a]) utt.verify_grad(self.op_class(axis=axis, mode='add'), [a], eps=4e-4)
utt.verify_grad(self.op_class(axis=axis, mode='mul'), [a], eps=4e-4)
class TestBinCount(utt.InferShapeTester): class TestBinCount(utt.InferShapeTester):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论