提交 adffdc1c authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #4422 from tsirif/develop

Develop SearchsortedOp to wrap numpy's searchsorted function
......@@ -138,13 +138,13 @@ def as_tensor_variable(x, name=None, ndim=None):
If a new `Variable` instance is created, it will be named with this
string.
ndim : None or integer
Return a Variable with this many dimensions. Raise TypeError if it's
not possible.
Return a Variable with this many dimensions.
Raises
------
ValueError
If an `Apply` with more than one output is fetched.
If an `Apply` with more than one output is fetched or
if `x` cannot be made into a Variable with `ndim` dimensions.
AsTensorError
If `x` cannot be converted to a TensorType Variable.
......
......@@ -8,7 +8,9 @@ import theano
from theano.tensor import basic
from theano.tensor import nlinalg # noqa
from theano import gof, scalar
from theano.gradient import DisconnectedType
from theano.gof import Generic
from theano import gradient
from theano.gradient import DisconnectedType, disconnected_type
tensor = basic
......@@ -68,6 +70,168 @@ class CpuContiguous(theano.Op):
cpu_contiguous = CpuContiguous()
class SearchsortedOp(theano.Op):
"""Wrapper of numpy.searchsorted.
For full documentation, see :func:`searchsorted`.
See Also
--------
searchsorted : numpy-like function to use the SearchsortedOp
"""
params_type = Generic()
__props__ = ("side", )
def __init__(self, side='left'):
if side == 'left' or side == 'right':
self.side = side
else:
raise ValueError('\'%(side)s\' is an invalid value for keyword \'side\''
% locals())
def get_params(self, node):
return self.side
def make_node(self, x, v, sorter=None):
x = basic.as_tensor(x, ndim=1)
v = basic.as_tensor(v)
out_type = v.type.clone(dtype='int64')
if sorter is None:
return theano.Apply(self, [x, v], [out_type()])
else:
sorter = basic.as_tensor(sorter, ndim=1)
if sorter.type not in basic.int_vector_types:
raise TypeError('sorter must be an integer vector',
sorter.type)
return theano.Apply(self, [x, v, sorter], [out_type()])
def infer_shape(self, node, shapes):
return [shapes[1]]
def perform(self, node, inputs, output_storage, params):
x = inputs[0]
v = inputs[1]
if len(node.inputs) == 3:
sorter = inputs[2]
else:
sorter = None
z = output_storage[0]
z[0] = np.searchsorted(x, v, side=params, sorter=sorter)
def c_support_code_struct(self, node, name):
return """
int right_%(name)s;
""" % locals()
def c_init_code_struct(self, node, name, sub):
side = sub['params']
fail = sub['fail']
return """
PyObject* tmp_%(name)s = PyUnicode_FromString("right");
if (tmp_%(name)s == NULL)
%(fail)s;
right_%(name)s = PyUnicode_Compare(%(side)s, tmp_%(name)s);
Py_DECREF(tmp_%(name)s);
""" % locals()
def c_code(self, node, name, inames, onames, sub):
sorter = None
if len(node.inputs) == 3:
x, v, sorter = inames
else:
x, v = inames
if not sorter:
sorter = "NULL"
z, = onames
fail = sub['fail']
return """
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_SearchSorted(%(x)s, (PyObject*) %(v)s,
right_%(name)s ? NPY_SEARCHLEFT : NPY_SEARCHRIGHT, (PyObject*) %(sorter)s);
if (!%(z)s)
%(fail)s;
""" % locals()
def c_code_cache_version(self):
return (1,)
def grad(self, inputs, output_gradients):
num_ins = len(inputs)
if num_ins == 3:
x, v, sorter = inputs
else:
x, v = inputs
x_grad = gradient._float_zeros_like(x)
v_grad = gradient._float_zeros_like(v)
if num_ins == 3:
return [x_grad, v_grad, disconnected_type()]
else:
return [x_grad, v_grad]
def searchsorted(x, v, side='left', sorter=None):
"""Find indices where elements should be inserted to maintain order.
Wrapping of numpy.searchsorted. Find the indices into a sorted array
`x` such that, if the corresponding elements in `v` were inserted
before the indices, the order of `x` would be preserved.
Parameters
----------
x: 1-D tensor (array-like)
Input array. If `sorter` is None, then it must be sorted in
ascending order, otherwise `sorter` must be an array of indices
which sorts it.
v: tensor (array-like)
Contains the values to be inserted into `x`.
side: {'left', 'right'}, optional.
If 'left' (default), the index of the first suitable
location found is given. If 'right', return the last such index. If
there is no suitable index, return either 0 or N (where N is the length
of `x`).
sorter: 1-D tensor of integers (array-like), optional
Contains indices that sort array `x` into ascending order.
They are typically the result of argsort.
Returns
-------
indices : tensor of integers (int64)
Array of insertion points with the same shape as `v`.
See Also
--------
`numpy.searchsorted <https://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.searchsorted.html>`_
Notes
-----
* Binary search is used to find the required insertion points.
* This Op is working **only on CPU** currently.
Examples
--------
>>> from theano import tensor
>>> x = tensor.dvector()
>>> idx = x.searchsorted(3)
>>> idx.eval({x: [1,2,3,4,5]})
array(2)
>>> tensor.extra_ops.searchsorted([1,2,3,4,5], 3).eval()
array(2)
>>> tensor.extra_ops.searchsorted([1,2,3,4,5], 3, side='right').eval()
array(3)
>>> tensor.extra_ops.searchsorted([1,2,3,4,5], [-10, 10, 2, 3]).eval()
array([0, 5, 1, 2])
.. versionadded:: 0.9
"""
return SearchsortedOp(side=side)(x, v, sorter)
class CumsumOp(theano.Op):
# See function cumsum for docstring
......
from __future__ import absolute_import, print_function, division
import unittest
import numpy as np
import numpy
import theano
from theano.tests import unittest_tools as utt
from theano.tensor.extra_ops import (CumsumOp, cumsum, CumprodOp, cumprod,
from theano.tensor.extra_ops import (SearchsortedOp, searchsorted,
CumsumOp, cumsum, CumprodOp, cumprod,
CpuContiguous, cpu_contiguous, BinCountOp,
bincount, DiffOp, diff, squeeze, compress,
RepeatOp, repeat, Bartlett, bartlett,
......@@ -37,6 +38,90 @@ def test_cpu_contiguous():
[numpy.random.rand(5, 7, 2)])
class TestSearchsortedOp(utt.InferShapeTester):
def setUp(self):
super(TestSearchsortedOp, self).setUp()
self.op_class = SearchsortedOp
self.op = SearchsortedOp()
self.x = T.vector('x')
self.v = T.tensor3('v')
self.a = 30 * np.random.random(50).astype(config.floatX)
self.b = 30 * np.random.random((8, 10, 5)).astype(config.floatX)
self.idx_sorted = np.argsort(self.a)
def test_searchsortedOp_on_sorted_input(self):
f = theano.function([self.x, self.v], searchsorted(self.x, self.v))
assert np.allclose(np.searchsorted(self.a[self.idx_sorted], self.b),
f(self.a[self.idx_sorted], self.b))
sorter = T.vector('sorter', dtype='int64')
f = theano.function([self.x, self.v, sorter], self.x.searchsorted(self.v, sorter=sorter, side='right'))
assert np.allclose(self.a.searchsorted(self.b, sorter=self.idx_sorted, side='right'),
f(self.a, self.b, self.idx_sorted))
sa = self.a[self.idx_sorted]
f = theano.function([self.x, self.v], self.x.searchsorted(self.v, side='right'))
assert np.allclose(sa.searchsorted(self.b, side='right'), f(sa, self.b))
def test_searchsortedOp_wrong_side_kwd(self):
self.assertRaises(ValueError, searchsorted, self.x, self.v, side='asdfa')
def test_searchsortedOp_on_no_1d_inp(self):
no_1d = T.dmatrix('no_1d')
self.assertRaises(ValueError, searchsorted, no_1d, self.v)
self.assertRaises(ValueError, searchsorted, self.x, self.v, sorter=no_1d)
def test_searchsortedOp_on_float_sorter(self):
sorter = T.vector('sorter', dtype="float32")
self.assertRaises(TypeError, searchsorted,
self.x, self.v, sorter=sorter)
def test_searchsortedOp_on_int_sorter(self):
compatible_types = ('int8', 'int16', 'int32', 'int64',)
# 'uint8', 'uint16', 'uint32', 'uint64')
for dtype in compatible_types:
sorter = T.vector('sorter', dtype=dtype)
f = theano.function([self.x, self.v, sorter],
searchsorted(self.x, self.v, sorter=sorter),
allow_input_downcast=True)
assert np.allclose(np.searchsorted(self.a, self.b, sorter=self.idx_sorted),
f(self.a, self.b, self.idx_sorted))
def test_searchsortedOp_on_right_side(self):
f = theano.function([self.x, self.v],
searchsorted(self.x, self.v, side='right'))
assert np.allclose(np.searchsorted(self.a, self.b, side='right'),
f(self.a, self.b))
def test_infer_shape(self):
# Test using default parameters' value
self._compile_and_check([self.x, self.v],
[searchsorted(self.x, self.v)],
[self.a[self.idx_sorted], self.b],
self.op_class)
# Test parameter ``sorter``
sorter = T.vector('sorter', dtype="int64")
self._compile_and_check([self.x, self.v, sorter],
[searchsorted(self.x, self.v, sorter=sorter)],
[self.a, self.b, self.idx_sorted],
self.op_class)
# Test parameter ``side``
la = np.ones(10).astype(config.floatX)
lb = np.ones(shape=(1, 2, 3)).astype(config.floatX)
self._compile_and_check([self.x, self.v],
[searchsorted(self.x, self.v, side='right')],
[la, lb],
self.op_class)
def test_grad(self):
utt.verify_grad(self.op, [self.a[self.idx_sorted], self.b])
class TestCumsumOp(utt.InferShapeTester):
def setUp(self):
......@@ -139,8 +224,9 @@ class TestBinCountOp(utt.InferShapeTester):
def test_bincountFn(self):
w = T.vector('w')
def ref(data, w=None, minlength=None):
size = data.max() + 1
size = int(data.max() + 1)
if minlength:
size = max(size, minlength)
if w is not None:
......@@ -152,6 +238,7 @@ class TestBinCountOp(utt.InferShapeTester):
for i in range(data.shape[0]):
out[data[i]] += 1
return out
for dtype in ('int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64'):
x = T.vector('x', dtype=dtype)
......@@ -225,36 +312,32 @@ class TestBinCountOp(utt.InferShapeTester):
self.assertRaises(TypeError, BinCountOp(), x)
else:
self._compile_and_check(
[x],
[BinCountOp()(x,None)],
[np.random.random_integers(
50, size=(25,)).astype(dtype)],
self.op_class)
self._compile_and_check([x],
[BinCountOp()(x, None)],
[np.random.random_integers(
50, size=(25,)).astype(dtype)],
self.op_class)
weights = np.random.random((25,)).astype(config.floatX)
self._compile_and_check(
[x],
[BinCountOp()(x, weights=weights)],
[np.random.random_integers(
50, size=(25,)).astype(dtype)],
self.op_class)
self._compile_and_check([x],
[BinCountOp()(x, weights=weights)],
[np.random.random_integers(
50, size=(25,)).astype(dtype)],
self.op_class)
if not numpy_16:
continue
self._compile_and_check(
[x],
[BinCountOp(minlength=60)(x, weights=weights)],
[np.random.random_integers(
50, size=(25,)).astype(dtype)],
self.op_class)
self._compile_and_check([x],
[BinCountOp(minlength=60)(x, weights=weights)],
[np.random.random_integers(
50, size=(25,)).astype(dtype)],
self.op_class)
self._compile_and_check(
[x],
[BinCountOp(minlength=5)(x, weights=weights)],
[np.random.random_integers(
50, size=(25,)).astype(dtype)],
self.op_class)
self._compile_and_check([x],
[BinCountOp(minlength=5)(x, weights=weights)],
[np.random.random_integers(
50, size=(25,)).astype(dtype)],
self.op_class)
class TestDiffOp(utt.InferShapeTester):
......@@ -429,9 +512,9 @@ class TestRepeatOp(utt.InferShapeTester):
r_var = T.scalar(dtype=dtype)
r = numpy.asarray(3, dtype=dtype)
if (dtype == 'uint64' or
(dtype in self.numpy_unsupported_dtypes and r_var.ndim == 1)):
self.assertRaises(TypeError,
repeat, x, r_var, axis=axis)
(dtype in self.numpy_unsupported_dtypes and
r_var.ndim == 1)):
self.assertRaises(TypeError, repeat, x, r_var, axis=axis)
else:
f = theano.function([x, r_var],
repeat(x, r_var, axis=axis))
......@@ -441,10 +524,10 @@ class TestRepeatOp(utt.InferShapeTester):
r_var = T.vector(dtype=dtype)
if axis is None:
r = np.random.random_integers(
5, size=a.size).astype(dtype)
5, size=a.size).astype(dtype)
else:
r = np.random.random_integers(
5, size=(10,)).astype(dtype)
5, size=(10,)).astype(dtype)
if dtype in self.numpy_unsupported_dtypes and r_var.ndim == 1:
self.assertRaises(TypeError,
......@@ -455,15 +538,16 @@ class TestRepeatOp(utt.InferShapeTester):
assert np.allclose(np.repeat(a, r, axis=axis),
f(a, r))
#check when r is a list of single integer, e.g. [3].
r = np.random.random_integers(10, size=()).astype(dtype) + 2
# check when r is a list of single integer, e.g. [3].
r = np.random.random_integers(
10, size=()).astype(dtype) + 2
f = theano.function([x],
repeat(x, [r], axis=axis))
assert np.allclose(np.repeat(a, r, axis=axis),
f(a))
assert not np.any([isinstance(n.op, RepeatOp)
assert not np.any([isinstance(n.op, RepeatOp)
for n in f.maker.fgraph.toposort()])
# check when r is theano tensortype that broadcastable is (True,)
r_var = theano.tensor.TensorType(broadcastable=(True,),
dtype=dtype)()
......@@ -472,9 +556,9 @@ class TestRepeatOp(utt.InferShapeTester):
repeat(x, r_var, axis=axis))
assert np.allclose(np.repeat(a, r[0], axis=axis),
f(a, r))
assert not np.any([isinstance(n.op, RepeatOp)
assert not np.any([isinstance(n.op, RepeatOp)
for n in f.maker.fgraph.toposort()])
@attr('slow')
def test_infer_shape(self):
for ndim in range(4):
......@@ -490,28 +574,27 @@ class TestRepeatOp(utt.InferShapeTester):
r_var = T.vector(dtype=dtype)
self.assertRaises(TypeError, repeat, x, r_var)
else:
self._compile_and_check(
[x, r_var],
[RepeatOp(axis=axis)(x, r_var)],
[a, r],
self.op_class)
self._compile_and_check([x, r_var],
[RepeatOp(axis=axis)(x, r_var)],
[a, r],
self.op_class)
r_var = T.vector(dtype=dtype)
if axis is None:
r = np.random.random_integers(
5, size=a.size).astype(dtype)
5, size=a.size).astype(dtype)
elif a.size > 0:
r = np.random.random_integers(
5, size=a.shape[axis]).astype(dtype)
5, size=a.shape[axis]).astype(dtype)
else:
r = np.random.random_integers(
5, size=(10,)).astype(dtype)
5, size=(10,)).astype(dtype)
self._compile_and_check(
[x, r_var],
[RepeatOp(axis=axis)(x, r_var)],
[a, r],
self.op_class)
[x, r_var],
[RepeatOp(axis=axis)(x, r_var)],
[a, r],
self.op_class)
def test_grad(self):
for ndim in range(3):
......@@ -638,26 +721,26 @@ class TestFillDiagonalOffset(utt.InferShapeTester):
# We can't use numpy.fill_diagonal as it is bugged.
assert numpy.allclose(numpy.diag(out, test_offset), val)
if test_offset >= 0:
assert (out == val).sum() == min( min(a.shape),
a.shape[1]-test_offset )
assert (out == val).sum() == min(min(a.shape),
a.shape[1] - test_offset)
else:
assert (out == val).sum() == min( min(a.shape),
a.shape[0]+test_offset )
assert (out == val).sum() == min(min(a.shape),
a.shape[0] + test_offset)
def test_gradient(self):
for test_offset in (-5, -4, -1, 0, 1, 4, 5):
# input 'offset' will not be tested
def fill_diagonal_with_fix_offset( a, val):
return fill_diagonal_offset( a, val, test_offset)
def fill_diagonal_with_fix_offset(a, val):
return fill_diagonal_offset(a, val, test_offset)
utt.verify_grad(fill_diagonal_with_fix_offset,
[numpy.random.rand(5, 8), numpy.random.rand()],
[numpy.random.rand(5, 8), numpy.random.rand()],
n_tests=1, rng=TestFillDiagonalOffset.rng)
utt.verify_grad(fill_diagonal_with_fix_offset,
[numpy.random.rand(8, 5), numpy.random.rand()],
[numpy.random.rand(8, 5), numpy.random.rand()],
n_tests=1, rng=TestFillDiagonalOffset.rng)
utt.verify_grad(fill_diagonal_with_fix_offset,
[numpy.random.rand(5, 5), numpy.random.rand()],
[numpy.random.rand(5, 5), numpy.random.rand()],
n_tests=1, rng=TestFillDiagonalOffset.rng)
def test_infer_shape(self):
......@@ -669,12 +752,12 @@ class TestFillDiagonalOffset(utt.InferShapeTester):
[numpy.random.rand(8, 5),
numpy.random.rand(),
test_offset],
self.op_class )
self.op_class)
self._compile_and_check([x, y, z], [self.op(x, y, z)],
[numpy.random.rand(5, 8),
numpy.random.rand(),
test_offset],
self.op_class )
self.op_class)
def test_to_one_hot():
......@@ -704,47 +787,48 @@ def test_to_one_hot():
[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]])
class test_Unique(utt.InferShapeTester):
def setUp(self):
super(test_Unique, self).setUp()
self.op_class = Unique
self.ops = [Unique(),
Unique(True),
Unique(False, True),
self.ops = [Unique(),
Unique(True),
Unique(False, True),
Unique(True, True)]
if bool(numpy_ver >= [1, 9]) :
if bool(numpy_ver >= [1, 9]):
self.ops.extend([
Unique(False, False, True),
Unique(True, False, True),
Unique(False, True, True),
Unique(True, True, True)])
def test_basic_vector(self):
Unique(False, False, True),
Unique(True, False, True),
Unique(False, True, True),
Unique(True, True, True)])
def test_basic_vector(self):
"""
Basic test for a vector.
Done by using the op and checking that it returns the right answer.
"""
x = theano.tensor.vector()
inp = np.asarray([2,1,3,2], dtype=config.floatX)
list_outs_expected = [[np.unique(inp)],
np.unique(inp, True),
np.unique(inp, False, True),
inp = np.asarray([2, 1, 3, 2], dtype=config.floatX)
list_outs_expected = [[np.unique(inp)],
np.unique(inp, True),
np.unique(inp, False, True),
np.unique(inp, True, True)]
if bool(numpy_ver >= [1, 9]) :
if bool(numpy_ver >= [1, 9]):
list_outs_expected.extend([
np.unique(inp, False, False, True),
np.unique(inp, True, False, True),
np.unique(inp, False, True, True),
np.unique(inp, True, True, True)])
for op, outs_expected in zip(self.ops, list_outs_expected) :
np.unique(inp, False, False, True),
np.unique(inp, True, False, True),
np.unique(inp, False, True, True),
np.unique(inp, True, True, True)])
for op, outs_expected in zip(self.ops, list_outs_expected):
f = theano.function(inputs=[x], outputs=op(x, return_list=True))
outs = f(inp)
# Compare the result computed to the expected value.
for out, out_exp in zip(outs, outs_expected):
utt.assert_allclose(out, out_exp)
def test_basic_matrix(self):
def test_basic_matrix(self):
""" Basic test for a matrix.
Done by using the op and checking that it returns the right answer.
"""
......@@ -754,20 +838,20 @@ class test_Unique(utt.InferShapeTester):
np.unique(inp, True),
np.unique(inp, False, True),
np.unique(inp, True, True)]
if bool(numpy_ver >= [1, 9]) :
if bool(numpy_ver >= [1, 9]):
list_outs_expected.extend([
np.unique(inp, False, False, True),
np.unique(inp, True, False, True),
np.unique(inp, False, True, True),
np.unique(inp, True, True, True)])
np.unique(inp, False, False, True),
np.unique(inp, True, False, True),
np.unique(inp, False, True, True),
np.unique(inp, True, True, True)])
for op, outs_expected in zip(self.ops, list_outs_expected):
f = theano.function(inputs=[x], outputs=op(x, return_list=True))
outs = f(inp)
# Compare the result computed to the expected value.
for out, out_exp in zip(outs, outs_expected):
utt.assert_allclose(out, out_exp)
def test_infer_shape_vector(self):
def test_infer_shape_vector(self):
"""
Testing the infer_shape with a vector.
"""
......@@ -776,32 +860,31 @@ class test_Unique(utt.InferShapeTester):
for op in self.ops:
if not op.return_inverse:
continue
if op.return_index :
if op.return_index:
f = op(x)[2]
else:
f = op(x)[1]
self._compile_and_check([x],
[f],
[np.asarray(np.array([2,1,3,2]),
self._compile_and_check([x],
[f],
[np.asarray(np.array([2, 1, 3, 2]),
dtype=config.floatX)],
self.op_class)
def test_infer_shape_matrix(self):
def test_infer_shape_matrix(self):
"""
Testing the infer_shape with a matrix.
"""
x = theano.tensor.matrix()
for op in self.ops:
if not op.return_inverse:
continue
if op.return_index :
if op.return_index:
f = op(x)[2]
else:
f = op(x)[1]
self._compile_and_check([x],
[f],
[np.asarray(np.array([[2, 1], [3, 2],[2, 3]]),
dtype=config.floatX)],
self.op_class)
self._compile_and_check([x],
[f],
[np.asarray(np.array([[2, 1], [3, 2], [2, 3]]),
dtype=config.floatX)],
self.op_class)
......@@ -692,6 +692,9 @@ class _tensor_py_operators(object):
def cumprod(self, axis=None):
return theano.tensor.extra_ops.cumprod(self, axis)
def searchsorted(self, v, side='left', sorter=None):
return theano.tensor.extra_ops.searchsorted(self, v, side, sorter)
def ptp(self, axis=None):
"""See 'theano.tensor.ptp'."""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论