Merge pull request #1181 from jlowin/triangle

Add triangle/nonzero functions

Merge pull request #1181 from jlowin/triangle
d7e822c7 · lamblin · 1806b028 · 16d66234 · d7e822c7 · d7e822c7
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1841,6 +1841,14 @@ class _tensor_py_operators:
        """See `theano.tensor.argmax`"""
        return argmax(self, axis, keepdims=keepdims)

+    def nonzero(self, return_matrix=False):
+        """See `theano.tensor.nonzero`"""
+        return nonzero(self, return_matrix=return_matrix)
+
+    def nonzero_values(self):
+        """See `theano.tensor.nonzero_values`"""
+        return nonzero_values(self)
+
    def sort(self,  axis=-1, kind='quicksort', order=None):
        """See `theano.tensor.sort`"""
        from theano.tensor.sort import sort
@@ -3218,6 +3226,270 @@ def ones(shape, dtype=None):
    return alloc(numpy.array(1, dtype=dtype), *shape)


+class Nonzero(gof.Op):
+    """
+    Return the indices of the elements that are non-zero.
+
+    Returns a matrix of shape (ndim, number of nonzero elements) such that
+    element (i,j) is the index in the ith dimension of the jth non-zero
+    element.
+
+    Note this is different than NumPy, which returns a tuple of arrays, one for
+    each dimension of the input array.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+
+    Returns
+    -------
+    result : matrix
+        matrix containing the indices of the non-zero elements of a.
+
+    See Also
+    --------
+    nonzero_values : Return the non-zero elements of the input array
+    flatnonzero : Return the indices of the non-zero elements of the
+        flattened input array.
+
+    """
+    def make_node(self, a):
+        a = as_tensor_variable(a)
+        if a.ndim == 0:
+            raise ValueError('Nonzero only supports non-scalar arrays.')
+        output = [TensorType(dtype='int64', broadcastable=(False, False))()]
+        return gof.Apply(self, [a], output)
+
+    def perform(self, node, inp, out_):
+        a = inp[0]
+        out, = out_
+
+        result_tuple = numpy.nonzero(a)
+        if len(result_tuple[0]) > 0:
+            result = numpy.vstack(result_tuple)
+        else:
+            result = numpy.zeros((len(result_tuple), 0))
+
+        out[0] = result.astype('int64')
+
+    def grad(self, inp, grads):
+        return [grad_undefined(self, 0, inp[0])]
+
+_nonzero = Nonzero()
+
+def nonzero(a, return_matrix=False):
+    """
+    Returns one of the following:
+
+        If return_matrix is False (default, same as NumPy):
+            A tuple of vector arrays such that the ith element of the jth array
+            is the index of the ith non-zero element of the input array in the
+            jth dimension.
+
+        If return_matrix is True (same as Theano Op):
+            Returns a matrix of shape (ndim, number of nonzero elements) such
+            that element (i,j) is the index in the ith dimension of the jth
+            non-zero element.
+
+    Parameters
+    ----------
+    a : array_like
+        Input array.
+
+    return_matrix : bool
+        If True, returns a symbolic matrix. If False, returns a tuple of
+        arrays. Defaults to False.
+
+    Returns
+    -------
+    result : tuple of vectors or matrix
+
+    See Also
+    --------
+    nonzero_values : Return the non-zero elements of the input array
+    flatnonzero : Return the indices of the non-zero elements of the
+        flattened input array.
+
+    """
+    matrix_result = _nonzero(a)
+    if return_matrix:
+        return matrix_result
+    else:
+        if a.ndim > 0:
+            tuple_result = tuple([matrix_result[i] for i in xrange(a.ndim)])
+        else:
+            tuple_result = tuple([matrix_result[0]])
+        return tuple_result
+
+def flatnonzero(a):
+    """
+    Return a vector of indices that are non-zero in the flattened version of a.
+
+    This is equivalent to nonzero(a.flatten(), return_matrix=True)[0]
+
+    Parameters
+    ----------
+    a : tensor
+        Input tensor
+
+    Returns
+    -------
+    res : vector
+        Output vector, containing the indices of the elements of `a.flatten()`
+        that are non-zero.
+
+    See Also
+    --------
+    nonzero : Return the indices of the non-zero elements of the input array.
+    nonzero_values : Return the non-zero elements of the input array
+    """
+    if a.ndim == 0:
+        raise ValueError('Nonzero only supports non-scalar arrays.')
+    return nonzero(a.flatten(), return_matrix=True)[0]
+
+def nonzero_values(a):
+    """
+    Return a vector of non-zero elements contained in the input array.
+
+    The following behavior works to extract non-zero elements from an array
+    in NumPy but is *NOT* supported by Theano:
+
+        a[numpy.nonzero(a)]
+
+    Instead, the nonzero_values function or method should be used:
+
+        tensor.nonzero_values(a)
+        a.nonzero_values()
+
+    This is equivalent to the following:
+
+        a.flatten()[tensor.flatnonzero(a)]
+
+    Parameters
+    ----------
+    a : tensor
+        Input tensor
+
+    Returns
+    -------
+    res : vector
+        Output vector, containing the non-zero elements of a.
+
+    See Also
+    --------
+    nonzero : Return the indices of the non-zero elements of the input array.
+    flatnonzero : Return the indices of the non-zero elements of the
+        flattened input array.
+    """
+    return a.flatten()[flatnonzero(a)]
+
+class Tri(gof.Op):
+    def __init__(self, dtype=None):
+        if dtype is None:
+            dtype = config.floatX
+        self.dtype = dtype
+
+    def make_node(self, N, M, k):
+        N = as_tensor_variable(N)
+        M = as_tensor_variable(M)
+        k = as_tensor_variable(k)
+        return gof.Apply(self, [N, M, k],
+                [TensorType(dtype=self.dtype, broadcastable=(False, False))()])
+
+    def perform(self, node, inp, out_):
+        N, M, k = inp
+        out, = out_
+        out[0] = numpy.tri(N, M, k, dtype=self.dtype)
+
+    def infer_shape(self, node, in_shapes):
+        out_shape = [node.inputs[0], node.inputs[1]]
+        return [out_shape]
+
+    def grad(self, inp, grads):
+        return [grad_undefined(self, i, inp[i]) for i in xrange(3)]
+
+    def __eq__(self, other):
+        return type(self) == type(other) and self.dtype == other.dtype
+
+    def __hash__(self):
+        return hash(self.dtype) ^ hash(type(self))
+
+
+def tri(N, M=None, k=0, dtype=None):
+    """
+    An array with ones at and below the given diagonal and zeros elsewhere.
+
+    Parameters
+    ----------
+    N : int
+        Number of rows in the array.
+    M : int, optional
+        Number of columns in the array.
+        By default, `M` is taken equal to `N`.
+    k : int, optional
+        The sub-diagonal at and below which the array is filled.
+        `k` = 0 is the main diagonal, while `k` < 0 is below it,
+        and `k` > 0 is above.  The default is 0.
+    dtype : dtype, optional
+        Data type of the returned array.  The default is float.
+
+    Returns
+    -------
+    tri : Array of shape (N, M)
+        Array with its lower triangle filled with ones and zero elsewhere;
+        in other words ``T[i,j] == 1`` for ``i <= j + k``, 0 otherwise.
+    """
+    if dtype is None:
+        dtype = config.floatX
+    if M is None:
+        M = N
+    op = Tri(dtype)
+    return op(N, M, k)
+
+
+def tril(m, k=0):
+    """
+    Lower triangle of an array.
+
+    Return a copy of an array with elements above the `k`-th diagonal zeroed.
+
+    Parameters
+    ----------
+    m : array_like, shape (M, N)
+        Input array.
+    k : int, optional
+        Diagonal above which to zero elements.  `k = 0` (the default) is the
+        main diagonal, `k < 0` is below it and `k > 0` is above.
+
+    Returns
+    -------
+    tril : array, shape (M, N)
+        Lower triangle of `m`, of same shape and data-type as `m`.
+
+    See Also
+    --------
+    triu : same thing, only for the upper triangle
+    """
+    return m * tri(m.shape[0], m.shape[1], k=k, dtype=m.dtype)
+
+
+def triu(m, k=0):
+    """
+    Upper triangle of an array.
+
+    Return a copy of a matrix with the elements below the `k`-th diagonal
+    zeroed.
+
+    Please refer to the documentation for `tril` for further details.
+
+    See Also
+    --------
+    tril : lower triangle of an array
+    """
+    return m * (1 - tri(m.shape[0], m.shape[1], k=k-1, dtype=m.dtype))
+
+
 class Eye(gof.Op):
    def __init__(self, dtype=None):
        if dtype is None:

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -32,7 +32,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
        tensor4, permute_row_elements, Flatten, fmatrix, fscalars, grad,
        inplace, iscalar, matrix, minimum, matrices, maximum, mul, neq,
        Reshape, row, scalar, scalars, second, smallest, stack, sub, Tensor,
-        tensor_copy, tensordot, TensorType, unbroadcast,
+        tensor_copy, tensordot, TensorType, Tri, tri, tril, triu, unbroadcast,
        var, Join, shape, MaxAndArgmax, lscalar, zvector, exp,
        get_scalar_constant_value, ivector, reshape, scalar_from_tensor, scal,
        iscalars, arange,  dscalars, fvector, imatrix, numeric_grad,
@@ -40,7 +40,8 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
        tile, patternbroadcast, Eye, Shape, Default, Dot, PermuteRowElements,
        ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
        dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1,
-        itensor3, Tile, AdvancedIncSubtensor, switch, Diagonal, Diag)
+        itensor3, Tile, AdvancedIncSubtensor, switch, Diagonal, Diag,
+        nonzero, flatnonzero, nonzero_values)
 from theano.tests import unittest_tools as utt
 from theano.printing import debugprint

@@ -1825,6 +1826,166 @@ def test_eye():
        yield check, dtype, 5, 3, -1


+class test_triangle(unittest.TestCase):
+    def test_tri(self):
+        def check(dtype, N, M_=None, k=0):
+            # Theano does not accept None as a tensor.
+            # So we must use a real value.
+            M = M_
+            # Currently DebugMode does not support None as inputs even if this is
+            # allowed.
+            if M is None and theano.config.mode in ['DebugMode', 'DEBUG_MODE']:
+                M = N
+            N_symb = tensor.iscalar()
+            M_symb = tensor.iscalar()
+            k_symb = tensor.iscalar()
+            f = function([N_symb, M_symb, k_symb],
+                        tri(N_symb, M_symb, k_symb, dtype=dtype))
+            result = f(N, M, k)
+            self.assertTrue(
+                numpy.allclose(result, numpy.tri(N, M_, k, dtype=dtype)))
+            self.assertTrue(result.dtype == numpy.dtype(dtype))
+        for dtype in ALL_DTYPES:
+            yield check, dtype, 3
+            # M != N, k = 0
+            yield check, dtype, 3, 5
+            yield check, dtype, 5, 3
+            # N == M, k != 0
+            yield check, dtype, 3, 3, 1
+            yield check, dtype, 3, 3, -1
+            # N < M, k != 0
+            yield check, dtype, 3, 5, 1
+            yield check, dtype, 3, 5, -1
+            # N > M, k != 0
+            yield check, dtype, 5, 3, 1
+            yield check, dtype, 5, 3, -1
+
+
+    def test_tril_triu(self):
+        def check_l(m, k=0):
+            m_symb = matrix(dtype=m.dtype)
+            k_symb = iscalar()
+            f = function([m_symb, k_symb], tril(m_symb, k_symb))
+            result = f(m, k)
+            self.assertTrue(numpy.allclose(result, numpy.tril(m, k)))
+            self.assertTrue(result.dtype == numpy.dtype(dtype))
+
+        def check_u(m, k=0):
+            m_symb = matrix(dtype=m.dtype)
+            k_symb = iscalar()
+            f = function([m_symb, k_symb], triu(m_symb, k_symb))
+            result = f(m, k)
+            self.assertTrue(numpy.allclose(result, numpy.triu(m, k)))
+            self.assertTrue(result.dtype == numpy.dtype(dtype))
+
+        for dtype in ALL_DTYPES:
+            m = rand_of_dtype((10, 10), dtype)
+            yield check_l, m, 0
+            yield check_l, m, 1
+            yield check_l, m, -1
+
+            yield check_u, m, 0
+            yield check_u, m, 1
+            yield check_u, m, -1
+
+            m = rand_of_dtype((10, 5), dtype)
+            yield check_l, m, 0
+            yield check_l, m, 1
+            yield check_l, m, -1
+
+            yield check_u, m, 0
+            yield check_u, m, 1
+            yield check_u, m, -1
+
+
+class test_nonzero(unittest.TestCase):
+    def test_nonzero(self):
+        def check(m):
+            m_symb = theano.tensor.tensor(dtype=m.dtype,
+                                        broadcastable = (False,) * m.ndim)
+
+            f_tuple = function([m_symb], nonzero(m_symb, return_matrix=False))
+            f_matrix = function([m_symb], nonzero(m_symb, return_matrix=True))
+
+            self.assertTrue(numpy.allclose(f_matrix(m), numpy.vstack(numpy.nonzero(m))))
+            for i, j in zip(f_tuple(m), numpy.nonzero(m)):
+                self.assertTrue(numpy.allclose(i, j))
+
+        rand0d = numpy.array(rand())
+        self.assertRaises(ValueError, check, rand0d)
+
+        rand1d = rand(8)
+        rand1d[:4] = 0
+        check(rand1d)
+
+        rand2d = rand(8, 9)
+        rand2d[:4] = 0
+        check(rand2d)
+
+        rand3d = rand(8, 9, 10)
+        rand3d[:4] = 0
+        check(rand3d)
+
+        rand4d = rand(8, 9, 10, 11)
+        rand4d[:4] = 0
+        check(rand4d)
+
+
+    def test_flatnonzero(self):
+        def check(m):
+            m_symb = theano.tensor.tensor(dtype=m.dtype,
+                                        broadcastable = (False,) * m.ndim)
+            f = function([m_symb], flatnonzero(m_symb))
+            result = f(m)
+            assert numpy.allclose(result, numpy.flatnonzero(m))
+
+        rand0d = numpy.array(rand())
+        self.assertRaises(ValueError, check, rand0d)
+
+        rand1d = rand(8)
+        rand1d[:4] = 0
+        check(rand1d)
+
+        rand2d = rand(8, 9)
+        rand2d[:4] = 0
+        check(rand2d)
+
+        rand3d = rand(8, 9, 10)
+        rand3d[:4] = 0
+        check(rand3d)
+
+        rand4d = rand(8, 9, 10, 11)
+        rand4d[:4] = 0
+        check(rand4d)
+
+    def test_nonzero_values(self):
+        def check(m):
+            m_symb = theano.tensor.tensor(dtype=m.dtype,
+                                        broadcastable = (False,) * m.ndim)
+            f = function([m_symb], nonzero_values(m_symb))
+            result = f(m)
+            assert numpy.allclose(result, m[numpy.nonzero(m)])
+
+        rand0d = rand()
+        self.assertRaises(ValueError, check, rand0d)
+
+        rand1d = rand(8)
+        rand1d[:4] = 0
+        check(rand1d)
+
+        rand2d = rand(8, 9)
+        rand2d[:4] = 0
+        check(rand2d)
+
+        rand3d = rand(8, 9, 10)
+        rand3d[:4] = 0
+        check(rand3d)
+
+        rand4d = rand(8, 9, 10, 11)
+        rand4d[:4] = 0
+        check(rand4d)
+
+
 def test_identity():
    def check(dtype):
        obj = rand_of_dtype((2,), dtype)
@@ -6472,6 +6633,22 @@ class TestInferShape(utt.InferShapeTester):
                                [Eye()(aiscal, biscal, ciscal)],
                                [3, 5, 0], Eye)

+        # Tri
+        aiscal = iscalar()
+        biscal = iscalar()
+        ciscal = iscalar()
+        self._compile_and_check([aiscal, biscal, ciscal],
+                                [Tri()(aiscal, biscal, ciscal)],
+                                [4, 4, 0], Tri)
+
+        self._compile_and_check([aiscal, biscal, ciscal],
+                                [Tri()(aiscal, biscal, ciscal)],
+                                [4, 5, 0], Tri)
+
+        self._compile_and_check([aiscal, biscal, ciscal],
+                                [Tri()(aiscal, biscal, ciscal)],
+                                [3, 5, 0], Tri)
+
        # Diagonal
        atens3 = tensor3()
        atens3_val = rand(4, 5, 3)