Merge pull request #1088 from abalkin/issue-1080

Issue #1080: Make TensorVariable interface more similar to that of numpy.ndarray

Merge pull request #1088 from abalkin/issue-1080
8d7a8d56 · nouiz · 1bb14956 · 2783b9cc · 8d7a8d56 · 8d7a8d56
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -25,6 +25,7 @@ from theano.tensor.utils import hash_from_ndarray
 from theano.scalar import ComplexError, IntegerDivisionError
 import theano.scalar.sharedvar
 from theano.gradient import grad_undefined
+from theano.gradient import grad_not_implemented
 from theano.gradient import DisconnectedType

 ### set up the external interface
@@ -1637,6 +1638,9 @@ class _tensor_py_operators:
    def ravel(self):
        return flatten(self)

+    def diagonal(self, offset=0, axis1=0, axis2=1):
+        return diagonal(self, offset, axis1, axis2)
+
    # CASTING
    def astype(self, dtype):
        return cast(self, dtype)
@@ -1794,6 +1798,8 @@ class _tensor_py_operators:
        """See `theano.tensor.conj`"""
        return conj(self)

+    conjugate = conj
+    
    def repeat(self, repeats, axis=None):
        """See `theano.tensor.repeat`"""
        from theano.tensor.extra_ops import repeat
@@ -7226,3 +7232,96 @@ def all(x, axis=None, keepdims=False):
    if keepdims:
        out = makeKeepDims(x, out, axis)
    return out
+
+class Diagonal(Op):
+    """Return specified diagonals.
+
+    :param x: A tensor variable with x.ndim >= 2.
+
+    :return: A vector representing the diagonal elements.
+    """
+    
+    def __init__(self, offset=0, axis1=0, axis2=1):
+        self.offset = offset
+        self.axis1 = axis1
+        self.axis2 = axis2
+ 
+    def __eq__(self, other):
+        return (type(self) == type(other))
+
+    def __hash__(self):
+        return hash(type(self))
+
+    def make_node(self, x):
+        x = as_tensor_variable(x)
+        assert x.ndim >= 2
+        return Apply(self, [x], [tensor(dtype=x.dtype,
+                                        broadcastable=[False] * (x.ndim -1))])
+
+    def perform(self, node, (x,), (z,)):
+        z[0] = x.diagonal(self.offset, self.axis1, self.axis2)
+
+    def grad(self, (x,), (gz,)):
+        return [grad_not_implemented(self, 0, x)]
+
+    def infer_shape(self, node, shapes):
+        in_shape, = shapes
+        dim1 = in_shape[self.axis1]
+        dim2 = in_shape[self.axis2]
+        out_shape = [d for i,d in enumerate(in_shape)
+                     if i not in (self.axis1, self.axis2)]
+        # The following logic is inspired by C code of PyArray_Diagonal().
+        offset = self.offset
+        if offset > 0:
+            diag_size = clip(dim2 - offset, 0, dim1)
+        elif offset < 0:
+            diag_size = clip(dim1 + offset, 0, dim2) 
+        else:
+            diag_size = minimum(dim1, dim2)
+        out_shape.append(diag_size)
+        return [tuple(out_shape)]
+
+    def __str__(self):
+        return self.__class__.__name__
+
+def diagonal(a, offset=0, axis1=0, axis2=1):
+    if (offset, axis1, axis2) == (0, 0, 1):
+        from theano.sandbox.linalg import extract_diag
+        return extract_diag(a)
+    return Diagonal(offset, axis1, axis2)(a)
+
+class Diag(Op):
+
+    def __eq__(self, other):
+        return type(self) == type(other)
+
+    def __hash__(self):
+        return hash(type(self))
+
+    def make_node(self, diag):
+        diag = as_tensor_variable(diag)
+        if diag.type.ndim != 1:
+            raise TypeError('data argument must be a vector', diag.type)
+
+        return Apply(self, [diag], [matrix(dtype=diag.dtype)])
+
+    def perform(self, node, inputs, (z,)):
+        z[0] = numpy.diag(inputs[0])
+
+    def grad(self, inputs, (gz,)):
+        return [diagonal(gz)]
+
+    def infer_shape(self, nodes, shapes):
+        return [(shapes[0][0],) * 2]
+
+    def __str__(self):
+        return self.__class__.__name__
+
+def diag(v, k=0):
+    if v.ndim == 1:
+        assert k == 0, "diagonals other than main are not implemented"
+        return Diag()(v)
+    elif v.ndim == 2:
+        return diagonal(v, k)
+    else:
+        raise ValueError("Input must be 1- or 2-d.")
--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -40,7 +40,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
        tile, patternbroadcast, Eye, Shape, Default, Dot, PermuteRowElements,
        ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
        dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1,
-        itensor3, Tile, AdvancedIncSubtensor, switch)
+        itensor3, Tile, AdvancedIncSubtensor, switch, Diagonal, Diag)
 from theano.tests import unittest_tools as utt
 from theano.printing import debugprint

@@ -6590,6 +6590,34 @@ class TestInferShape(utt.InferShapeTester):
                                [Eye()(aiscal, biscal, ciscal)],
                                [3, 5, 0], Eye)

+        # Diagonal
+        atens3 = tensor3()
+        atens3_val = rand(4, 5, 3)
+        atens3_diag = Diagonal()(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(1)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(-1)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(1,0,2)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(1,1,2)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(1,2,0)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+
+        # Diag
+        advec = dvector()
+        advec_val = rand(4)
+        self._compile_and_check([advec], [Diag()(advec)],
+                                [advec_val], Diag)
+
        # Shape
        # 'opt.Makevector' precludes optimizer from disentangling
        # elements of shape
@@ -7070,7 +7098,7 @@ class TestTensorInstanceMethods(unittest.TestCase):
        assert_array_equal(X.argsort().eval({X: x}), x.argsort())
        assert_array_equal(X.argsort(1).eval({X: x}), x.argsort(1))

-    def test_dot(self):
+    def test_clip(self):
        X, Y = self.vars
        x, y = self.vals
        Z = X.clip(0.5 - Y, 0.5 + Y)
@@ -7099,6 +7127,7 @@ class TestTensorInstanceMethods(unittest.TestCase):
        Z = X + Y * 1j
        z = x + y * 1j
        assert_array_equal(Z.conj().eval({Z: z}), z.conj())
+        assert_array_equal(Z.conjugate().eval({Z: z}), z.conj())

    def test_round(self):
        X, _ = self.vars
@@ -7128,6 +7157,16 @@ class TestTensorInstanceMethods(unittest.TestCase):
        x, _ = self.vals
        assert_array_equal(X.ravel().eval({X: x}), x.ravel())

+    def test_diagonal(self):
+        X, _ = self.vars
+        x, _ = self.vals
+        assert_array_equal(X.diagonal().eval({X: x}), x.diagonal())
+        assert_array_equal(X.diagonal(1).eval({X: x}), x.diagonal(1))
+        assert_array_equal(X.diagonal(-1).eval({X: x}), x.diagonal(-1))
+        for offset, axis1, axis2 in [(1,0,1), (-1,0,1), (0,1,0), (-2,1,0)]:
+            assert_array_equal(X.diagonal(offset, axis1, axis2).eval({X: x}),
+                               x.diagonal(offset, axis1, axis2))
+

 if __name__ == '__main__':