提交 02835cce authored 作者: Marc-Alexandre Cote's avatar Marc-Alexandre Cote 提交者: Christos Tsirigotis

Add the searchsorted function similar to the numpy's one.

- Make sure perform outputs an numpy.ndarray. - Add C implementation of searchsorted - Fix PEP8 format - Fix props, make_node, perform of SearchsortedOp - Add searchsorted method (wrapper) on tensor variable - Fix c_code and grad in SearchsortedOp, fix tests - Update documentation
上级 6fcd2cd0
...@@ -147,6 +147,8 @@ def as_tensor_variable(x, name=None, ndim=None): ...@@ -147,6 +147,8 @@ def as_tensor_variable(x, name=None, ndim=None):
If an `Apply` with more than one output is fetched. If an `Apply` with more than one output is fetched.
AsTensorError AsTensorError
If `x` cannot be converted to a TensorType Variable. If `x` cannot be converted to a TensorType Variable.
TypeError
If `x` cannot be made into a Variable with `ndim` dimensions.
""" """
if hasattr(x, '_as_TensorVariable'): if hasattr(x, '_as_TensorVariable'):
......
...@@ -68,6 +68,132 @@ class CpuContiguous(theano.Op): ...@@ -68,6 +68,132 @@ class CpuContiguous(theano.Op):
cpu_contiguous = CpuContiguous() cpu_contiguous = CpuContiguous()
class SearchsortedOp(theano.Op):
"""Wrapper of numpy.searchsorted.
For full documentation, see :func:`searchsorted`.
See Also
--------
searchsorted : numpy-like function to use the SearchsortedOp
"""
__props__ = ("side", )
def __init__(self, side='left'):
self.side = side
def make_node(self, x, v, sorter=None):
x = basic.as_tensor(x, ndim=1)
v = basic.as_tensor(v)
out_type = v.type.clone(dtype='int64')
if sorter is None:
return theano.Apply(self, [x, v], [out_type()])
else:
sorter = basic.as_tensor(sorter, ndim=1)
if sorter.type not in basic.int_vector_types:
raise TypeError('sorter must be an integer vector',
sorter.type)
return theano.Apply(self, [x, v, sorter], [out_type()])
def infer_shape(self, node, shapes):
return [shapes[1]]
def perform(self, node, inputs, output_storage):
x = inputs[0]
v = inputs[1]
if len(node.inputs) == 3:
sorter = inputs[2]
else:
sorter = None
z = output_storage[0]
z[0] = np.searchsorted(x, v, side=self.side, sorter=sorter)
def c_code(self, node, name, inames, onames, sub):
sorter = None
if len(node.inputs) == 3:
x, v, sorter = inames
else:
x, v = inames
if not sorter:
sorter = "NULL"
z, = onames
side = "NPY_SEARCHRIGHT" if self.side == 'right' else "NPY_SEARCHLEFT"
fail = sub['fail']
return """
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_SearchSorted(%(x)s, (PyObject*) %(v)s,
%(side)s, (PyObject*) %(sorter)s);
if (!%(z)s)
%(fail)s;
""" % locals()
def c_code_cache_version(self):
return (0, 1, 2)
def grad(self, inputs, output_gradients):
num_ins = len(inputs)
if num_ins == 3:
x, v, sorter = inputs
else:
x, v = inputs
x_grad = x.zeros_like()
if v.ndim == 1:
v_grad = v.zeros_like()
else:
v_grad = theano.gradient.grad_not_implemented(
self, 1, v, "Grad is not implemented for inputs with "
"number of dimension other than 1.")
if num_ins == 3:
sorter_grad = theano.gradient.grad_undefined(
self, 2, sorter,
"searchsorted is not defined for non-integer sorter so "
"searchsorted(x, nb, sorter+eps), for eps > 0, "
"is undefined")
return [x_grad, v_grad, sorter_grad]
else:
return [x_grad, v_grad]
def searchsorted(x, v, side='left', sorter=None):
"""Find indices where elements should be inserted to maintain order.
Wraping of numpy.searchsorted. Find the indices into a sorted array
`x` such that, if the corresponding elements in `v` were inserted
before the indices, the order of `x` would be preserved.
Parameters
----------
x: 1-D tensor (array-like)
Input array. If `sorter` is None, then it must be sorted in
ascending order, otherwise `sorter` must be an array of indices
that sort it.
v: tensor (array-like)
Contains the values to be inserted into `x`.
side: {'left', 'right'}, optional.
If 'left' (default), the index of the first suitable
location found is given. If 'right', return the last such index. If
there is no suitable index, return either 0 or N (where N is the length
of `x`).
sorter: 1-D tensor of integers (array-like), optional
Contains indices that sort array `x` into ascending order.
They are typically the result of argsort.
.. versionadded:: 0.8.2
Returns
-------
indices : tensor of integers (int64)
Array of insertion points with the same shape as `v`.
"""
return SearchsortedOp(side=side)(x, v, sorter)
class CumsumOp(theano.Op): class CumsumOp(theano.Op):
# See function cumsum for docstring # See function cumsum for docstring
......
...@@ -6,7 +6,9 @@ import numpy ...@@ -6,7 +6,9 @@ import numpy
import theano import theano
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.extra_ops import (CumsumOp, cumsum, CumprodOp, cumprod,
from theano.tensor.extra_ops import (SearchsortedOp, searchsorted,
CumsumOp, cumsum, CumprodOp, cumprod,
CpuContiguous, cpu_contiguous, BinCountOp, CpuContiguous, cpu_contiguous, BinCountOp,
bincount, DiffOp, diff, squeeze, compress, bincount, DiffOp, diff, squeeze, compress,
RepeatOp, repeat, Bartlett, bartlett, RepeatOp, repeat, Bartlett, bartlett,
...@@ -37,6 +39,123 @@ def test_cpu_contiguous(): ...@@ -37,6 +39,123 @@ def test_cpu_contiguous():
[numpy.random.rand(5, 7, 2)]) [numpy.random.rand(5, 7, 2)])
class TestSearchsortedOp(utt.InferShapeTester):
def setUp(self):
super(TestSearchsortedOp, self).setUp()
self.op_class = SearchsortedOp
self.op = SearchsortedOp()
self.x = T.vector('x')
self.v = T.tensor3('v')
self.a = np.random.random(100).astype(config.floatX)
self.b = np.random.random((10, 20, 5)).astype(config.floatX)
self.idx_sorted = np.argsort(self.a)
def tearDown(self):
self.x = None
self.v = None
self.a = None
self.b = None
self.idx_sorted = None
def test_searchsortedOp_on_sorted_input(self):
f = theano.function([self.x, self.v], searchsorted(self.x, self.v),
mode="DebugMode")
assert np.allclose(
np.searchsorted(self.a[self.idx_sorted], self.b),
f(self.a[self.idx_sorted], self.b))
def test_searchsortedOp_on_none_sorter(self):
# Current implementation of numpy.searchsorted
# does not raise an error if `x` is not sorted and sorter is None.
sorter = T.vector('sorter', dtype="int64")
f = theano.function([self.x, self.v, sorter],
searchsorted(self.x, self.v, sorter=sorter))
# assert np.allclose(
# np.searchsorted(self.a, self.b, sorter=None),
# f(self.a, self.b, sorter=None))
self.assertRaises(ValueError, f,
self.a[self.idx_sorted], self.b, None)
def test_searchsortedOp_on_float_sorter(self):
sorter = T.vector('sorter', dtype="float32")
self.assertRaises(TypeError, searchsorted,
self.x, self.v, sorter=sorter)
def test_searchsortedOp_on_int_sorter(self):
compatible_types = ('int8', 'int16', 'int32', 'int64',)
# 'uint8', 'uint16', 'uint32', 'uint64')
for dtype in compatible_types:
sorter = T.vector('sorter', dtype=dtype)
f = theano.function([self.x, self.v, sorter],
searchsorted(self.x, self.v, sorter=sorter),
mode="DebugMode", allow_input_downcast=True)
assert np.allclose(
np.searchsorted(self.a, self.b, sorter=self.idx_sorted),
f(self.a, self.b, self.idx_sorted))
def test_searchsortedOp_on_right_side(self):
f = theano.function([self.x, self.v],
searchsorted(self.x, self.v, side='right'),
mode="DebugMode")
assert np.allclose(
np.searchsorted(self.a, self.b, side='right'),
f(self.a, self.b))
def test_use_c_code(self):
f = theano.function([self.x, self.v], searchsorted(self.x, self.v),
mode="FAST_RUN")
assert np.allclose(
np.searchsorted(self.a[self.idx_sorted], self.b),
f(self.a[self.idx_sorted], self.b))
f = theano.function([self.x, self.v], searchsorted(self.x, self.v),
mode=theano.compile.Mode(linker="c",
optimizer='fast_run'))
assert np.allclose(
np.searchsorted(self.a[self.idx_sorted], self.b),
f(self.a[self.idx_sorted], self.b))
def test_infer_shape(self):
# Test using default parameters' value
self._compile_and_check([self.x, self.v],
[searchsorted(self.x, self.v)],
[self.a[self.idx_sorted], self.b],
self.op_class)
# Test parameter ``sorter``
sorter = T.vector('sorter', dtype="int64")
self._compile_and_check([self.x, self.v, sorter],
[searchsorted(self.x, self.v, sorter=sorter)],
[self.a, self.b, self.idx_sorted],
self.op_class)
# Test parameter ``side``
self.a = np.ones(10).astype(config.floatX)
self.b = np.ones(shape=(1, 2, 3)).astype(config.floatX)
self._compile_and_check([self.x, self.v],
[searchsorted(self.x, self.v, side='right')],
[self.a, self.b],
self.op_class)
def test_grad(self):
self.a = np.random.random(100).astype(config.floatX)
self.b = np.random.random((1, 2, 5)).astype(config.floatX)
self.idx_sorted = np.argsort(self.a)
self.assertRaises(theano.gradient.NullTypeGradError,
utt.verify_grad, self.op,
[self.a[self.idx_sorted], self.b])
self.a = np.random.random(100).astype(config.floatX)
self.b = np.random.random(10).astype(config.floatX)
self.idx_sorted = np.argsort(self.a)
utt.verify_grad(self.op, [self.a[self.idx_sorted], self.b])
class TestCumsumOp(utt.InferShapeTester): class TestCumsumOp(utt.InferShapeTester):
def setUp(self): def setUp(self):
......
...@@ -692,6 +692,9 @@ class _tensor_py_operators(object): ...@@ -692,6 +692,9 @@ class _tensor_py_operators(object):
def cumprod(self, axis=None): def cumprod(self, axis=None):
return theano.tensor.extra_ops.cumprod(self, axis) return theano.tensor.extra_ops.cumprod(self, axis)
def searchsorted(self, v, side='left', sorter=None):
return theano.tensor.extra_ops.searchsorted(self, v, side, sorter)
def ptp(self, axis=None): def ptp(self, axis=None):
"""See 'theano.tensor.ptp'.""" """See 'theano.tensor.ptp'."""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论