Merge remote-tracking branch 'upstream/master' into tensor_op

e596e80e · Boris Fomitchev · 0be8ae68 · c034ad2b · e596e80e · e596e80e
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2201,6 +2201,67 @@ def sqr(a):
 square = sqr


+def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):
+    """Calculate the covariance matrix.
+    Covariance indicates the level to which two variables vary together.
+    If we examine N-dimensional samples, :math:`m = [x_1, x_2, ... x_N]^T`,
+    then the covariance matrix element :math:`C_{ij}` is the covariance of
+    :math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance
+    of :math:`x_i`. Code and docstring ported from numpy.
+    ----------
+    m : array_like
+        A 2-D array containing multiple variables and observations.
+        Each row of `m` represents a variable, and each column is
+        observations of all those variables.
+    y : array_like, optional
+        An additional set of variables and observations. `y` has the same form
+        as that of `m`.
+    rowvar : bool, optional
+        If `rowvar` is True (default), then each row represents a
+        variable, with observations in the columns. Otherwise, the relationship
+        is transposed: each column represents a variable, while the rows
+        contain observations.
+    bias : bool, optional
+        Default normalization (False) is by ``(N - 1)``, where ``N`` is the
+        number of observations given (unbiased estimate). If `bias` is True, then
+        normalization is by ``N``. These values can be overridden by using the
+        keyword ``ddof``.
+    ddof : int, optional
+        If not ``None`` the default value implied by `bias` is overridden.
+        The default value is ``None``.
+    Returns
+    -------
+    out : The covariance matrix of the variables.
+    """
+
+    if fweights is not None:
+        raise NotImplementedError('fweights are not implemented')
+    if aweights is not None:
+        raise NotImplementedError('aweights are not implemented')
+
+    if not rowvar and m.shape[0] != 1:
+        m = m.T
+
+    if y is not None:
+        if not rowvar and y.shape[0] != 1:
+            y = y.T
+        m = theano.tensor.concatenate((m, y), axis=0)
+
+    if ddof is None:
+        if not bias:
+            ddof = 1
+        else:
+            ddof = 0
+
+    # Determine the normalization
+    fact = m.shape[1] - ddof
+
+    m -= m.mean(axis=1, keepdims=1)
+    c = m.dot(m.T)
+    c *= theano.tensor.constant(1) / fact
+    return c.squeeze()
+
+
 @_scal_elemwise
 def sqrt(a):
    """square root of a"""

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -8226,6 +8226,107 @@ def test_norm():
    assert np.allclose(f([1, 1]), np.sqrt(2))


+class test_cov(unittest.TestCase):
+
+    def test_core(self):
+        x = theano.tensor.matrix('x')
+        c = theano.tensor.cov(x)
+        f = theano.function([x], c)
+
+        # basic cov function
+        data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
+        assert np.allclose(f(data), np.cov(data))
+
+        data = np.asarray(np.random.rand(5, 3), dtype=config.floatX)
+        assert np.allclose(f(data), np.cov(data))
+
+        data = np.asarray(np.random.rand(10, 10), dtype=config.floatX)
+        assert np.allclose(f(data), np.cov(data))
+
+        data = np.asarray(np.random.rand(2, 2), dtype=config.floatX)
+        assert np.allclose(f(data), np.cov(data))
+
+        data = np.asarray(np.random.rand(1, 2), dtype=config.floatX)
+        assert np.allclose(f(data), np.cov(data))
+
+    def test_rowvar(self):
+        for rowvar in [True, False]:
+            x = theano.tensor.matrix('x')
+            c = theano.tensor.cov(x, rowvar=rowvar)
+            f = theano.function([x], c)
+
+            data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
+            assert np.allclose(f(data), np.cov(data, rowvar=rowvar))
+
+            data = np.asarray(np.random.rand(5, 3), dtype=config.floatX)
+            assert np.allclose(f(data), np.cov(data, rowvar=rowvar))
+
+            data = np.asarray(np.random.rand(10, 10), dtype=config.floatX)
+            assert np.allclose(f(data), np.cov(data, rowvar=rowvar))
+
+            data = np.asarray(np.random.rand(2, 2), dtype=config.floatX)
+            assert np.allclose(f(data), np.cov(data, rowvar=rowvar))
+
+        # check when variables are along the first axis
+        x = theano.tensor.matrix('x')
+        c = theano.tensor.cov(x, rowvar=False)
+        f = theano.function([x], c)
+        data = np.asarray(np.random.rand(2, 1), dtype=config.floatX)
+        assert np.allclose(f(data), np.cov(data, rowvar=False))
+
+    def test_y(self):
+        # test y
+        x = theano.tensor.matrix('x')
+        y = theano.tensor.matrix('y')
+        c = theano.tensor.cov(x, y=y)
+        f = theano.function([x, y], c)
+
+        data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
+        y = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
+        assert np.allclose(f(data, y), np.cov(data, y=y))
+
+        data = np.asarray(np.random.rand(5, 3), dtype=config.floatX)
+        y = np.asarray(np.random.rand(5, 3), dtype=config.floatX)
+        assert np.allclose(f(data, y), np.cov(data, y=y))
+
+        data = np.asarray(np.random.rand(10, 10), dtype=config.floatX)
+        y = np.asarray(np.random.rand(10, 10), dtype=config.floatX)
+        assert np.allclose(f(data, y), np.cov(data, y=y))
+
+        data = np.asarray(np.random.rand(2, 2), dtype=config.floatX)
+        y = np.asarray(np.random.rand(2, 2), dtype=config.floatX)
+        assert np.allclose(f(data, y), np.cov(data, y=y))
+
+    def test_ddof(self):
+
+        for ddof in range(0, 5):
+            x = theano.tensor.matrix('x')
+            c = theano.tensor.cov(x, ddof=ddof)
+            f = theano.function([x], c)
+
+            data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
+            assert np.allclose(f(data), np.cov(data, ddof=ddof))
+
+    def test_bias(self):
+
+        for bias in [True, False]:
+            x = theano.tensor.matrix('x')
+            c = theano.tensor.cov(x, bias=bias)
+            f = theano.function([x], c)
+
+            data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
+            assert np.allclose(f(data), np.cov(data, bias=bias))
+
+        for ddof in range(0, 5):
+            for bias in [True, False]:
+                x = theano.tensor.matrix('x')
+                c = theano.tensor.cov(x, ddof=ddof, bias=bias)
+                f = theano.function([x], c)
+
+                data = np.asarray(np.random.rand(3, 5), dtype=config.floatX)
+                assert np.allclose(f(data), np.cov(data, ddof=ddof, bias=bias))
+
+
 class test_ptp(unittest.TestCase):
    def test_scalar(self):
        # Should return 0 for all scalar