Merge pull request #1885 from Tanjay94/svd_op

Svd op

Merge pull request #1885 from Tanjay94/svd_op
66716e23 · Frédéric Bastien · 123e1241 · 59a1bde3 · 66716e23 · 66716e23
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -5,10 +5,12 @@ import sys
 import numpy
 import theano
 from theano import gof, Type, Apply
 from theano import tensor, scalar, config
 from theano.compat.six import StringIO
 from theano.scalar import Scalar
 scal = scalar # somewhere scalar gets reassigned to be a function
 from theano.gof.python25 import all, any
@@ -3534,7 +3536,7 @@ __global__ void kEye(float* a, int n, int m) {
                    cudaGetErrorString(sts),
                    dims[0], dims[1]);
            %(fail)s;
-         }
+        }
        """ % locals()
        return s

--- a/theano/sandbox/linalg/ops.py
+++ b/theano/sandbox/linalg/ops.py
@@ -489,7 +489,8 @@ class MatrixPinv(Op):
    :math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
    :math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
-    Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix.
+    Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity
+    matrix.
    This method is not faster then `matrix_inverse`. Its strength comes from
    that it works for non-square matrices.
    If you have a square matrix though, `matrix_inverse` can be both more
@@ -519,14 +520,10 @@ class MatrixPinv(Op):
        return Apply(self, [x], [x.type()])
    def perform(self, node, (x,), (z, )):
-        try:
+        if imported_scipy:
-            if imported_scipy:
+            z[0] = scipy.linalg.pinv(x).astype(x.dtype)
-                z[0] = scipy.linalg.pinv(x).astype(x.dtype)
+        else:
-            else:
+            z[0] = numpy.linalg.pinv(x).astype(x.dtype)
-                z[0] = numpy.linalg.pinv(x).astype(x.dtype)
-        except numpy.linalg.LinAlgError:
-            logger.debug('Failed to invert %s' % str(node.inputs[0]))
-            raise
    def __str__(self):
        return "MatrixPseudoInverse"
@@ -857,6 +854,7 @@ def spectral_radius_bound(X, log2_exponent):
    if log2_exponent <= 0:
        raise ValueError('spectral_radius_bound requires a strictly positive '
                         'exponent', log2_exponent)
    XX = X
    for i in xrange(log2_exponent):
        XX = tensor.dot(XX, XX)
@@ -865,41 +863,6 @@ def spectral_radius_bound(X, log2_exponent):
            2 ** (-log2_exponent))
-class A_Xinv_b(Op):
-    """Product of form a inv(X) b"""
-    def make_node(self, a, X, b):
-        assert imported_scipy, (
-            "Scipy not available. Scipy is needed for the A_Xinv_b op")
-        a = as_tensor_variable(a)
-        X = as_tensor_variable(X)
-        b = as_tensor_variable(b)
-        assert a.ndim == 2
-        assert X.ndim == 2
-        assert b.ndim == 2
-        o = theano.tensor.matrix(dtype=x.dtype)
-        return Apply(self, [a, X, b], [o])
-    def perform(self, ndoe, inputs, outstor):
-        a, X, b = inputs
-        if 1:
-            L_factor = scipy.linalg.cho_factor(X)
-            xb = scipy.linalg.cho_solve(L_factor, b)
-            xa = scipy.linalg.cho_solve(L_factor, a.T)
-            z = numpy.dot(xa.T, xb)
-        else:
-            raise NotImplementedError(self.X_structure)
-        outstor[0][0] = z
-    def grad(self, inputs, g_outputs):
-        gz, = g_outputs
-        a, X, b = inputs
-        iX = matrix_inverse(X)
-        ga = matrix_dot(gz, b.T, iX.T)
-        gX = -matrix_dot(iX.T, a, gz, b.T, iX.T)
-        gb = matrix_dot(ix.T, a.T, gz)
-        return [ga, gX, gb]
 class Eig(Op):
    """Compute the eigenvalues and right eigenvectors of a square array.
@@ -928,12 +891,7 @@ class Eig(Op):
        return Apply(self, [x], [w, v])
    def perform(self, node, (x,), (w, v)):
-        try:
+        w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
-            w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
-        except numpy.linalg.LinAlgError:
-            logger.debug('Failed to find %s of %s' % (self._numop.__name__,
-                                                      node.inputs[0]))
-            raise
    def infer_shape(self, node, shapes):
        n = shapes[0][0]
@@ -945,6 +903,201 @@ class Eig(Op):
 eig = Eig()
+class SVD(Op):
+    # See doc in the docstring of the function just after this class.
+    _numop = staticmethod(numpy.linalg.svd)
+    def __init__(self, full_matrices=True, compute_uv=True):
+        """
+        inputs :
+        --------
+        full_matrices : bool, optional
+            If True (default), u and v have the shapes (M, M) and (N, N),
+            respectively.
+            Otherwise, the shapes are (M, K) and (K, N), respectively,
+            where K = min(M, N).
+        compute_uv : bool, optional
+            Whether or not to compute u and v in addition to s.
+            True by default.
+        """
+        self.full_matrices = full_matrices
+        self.compute_uv = compute_uv
+    def __hash__(self):
+        return hash((type(self), self.props()))
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+    def props(self):
+        return self.full_matrices, self.compute_uv,
+    def make_node(self, x):
+        x = as_tensor_variable(x)
+        assert x.ndim == 2, "The input of svd function should be a matrix."
+        w = theano.tensor.matrix(dtype=x.dtype)
+        u = theano.tensor.matrix(dtype=x.dtype)
+        v = theano.tensor.matrix(dtype=x.dtype)
+        return Apply(self, [x], [w, u, v])
+    def perform(self, node, (x,), (w, u, v)):
+        assert x.ndim == 2, "The input of svd function should be a matrix."
+        w[0], u[0], v[0] = self._numop(x,
+                                       self.full_matrices,
+                                       self.compute_uv)
+    def __str__(self):
+        return self._numop.__name__.capitalize()
+def svd(a, full_matrices=1, compute_uv=1):
+    """
+    This function performs the SVD on CPU.
+    Parameters :
+    ------------
+    full_matrices : bool, optional
+        If True (default), u and v have the shapes (M, M) and (N, N),
+        respectively.
+        Otherwise, the shapes are (M, K) and (K, N), respectively,
+        where K = min(M, N).
+    compute_uv : bool, optional
+        Whether or not to compute u and v in addition to s.
+        True by default.
+    Returns :
+    -------
+    U, V and D matrices.
+    """
+    return SVD(full_matrices, compute_uv)(a)
+class QRFull(Op):
+    """
+    Full QR Decomposition.
+    Computes the QR decomposition of a matrix.
+    Factor the matrix a as qr, where q is orthonormal
+    and r is upper-triangular.
+    """
+    _numop = staticmethod(numpy.linalg.qr)
+    def __init__(self, mode):
+        self.mode = mode
+    def __hash__(self):
+        return hash((type(self), self.props()))
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+    def make_node(self, x):
+        x = as_tensor_variable(x)
+        assert x.ndim == 2, "The input of qr function should be a matrix."
+        q = theano.tensor.matrix(dtype=x.dtype)
+        r = theano.tensor.matrix(dtype=x.dtype)
+        return Apply(self, [x], [q, r])
+    def props(self):
+        return self.mode
+    def perform(self, node, (x,), (q, r)):
+        assert x.ndim == 2, "The input of qr function should be a matrix."
+        q[0], r[0] = self._numop(x,
+                                 self.mode)
+    def __str__(self):
+        return self._numop.__class__.__name__
+class QRIncomplete(Op):
+    """
+    Incomplete QR Decomposition.
+    Computes the QR decomposition of a matrix.
+    Factor the matrix a as qr and return a single matrix.
+    """
+    _numop = staticmethod(numpy.linalg.qr)
+    def __init__(self, mode):
+        self.mode = mode
+    def __hash__(self):
+        return hash((type(self), self.props()))
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+    def props(self):
+        return self.mode
+    def make_node(self, x):
+        x = as_tensor_variable(x)
+        assert x.ndim == 2, "The input of qr function should be a matrix."
+        q = theano.tensor.matrix(dtype=x.dtype)
+        return Apply(self, [x], [q])
+    def perform(self, node, (x,), (q,)):
+        assert x.ndim == 2, "The input of qr function should be a matrix."
+        q[0] = self._numop(x,
+                           self.mode)
+    def __str__(self):
+        return self._numop.__class__.__name__
+def qr(a, mode="full"):
+    """
+    Computes the QR decomposition of a matrix.
+    Factor the matrix a as qr, where q
+    is orthonormal and r is upper-triangular.
+    Parameters :
+    ------------
+    a : array_like, shape (M, N)
+        Matrix to be factored.
+    mode : {'reduced', 'complete', 'r', 'raw', 'full', 'economic'}, optional
+        If K = min(M, N), then
+        'reduced' : returns q, r with dimensions (M, K), (K, N) (default)
+        'complete' : returns q, r with dimensions (M, M), (M, N)
+        'r' : returns r only with dimensions (K, N)
+        'raw' : returns h, tau with dimensions (N, M), (K,)
+        'full' : alias of 'reduced', deprecated
+        'economic' : returns h from 'raw', deprecated. The options 'reduced',
+        'complete', and 'raw' are new in numpy 1.8, see the notes for more
+        information. The default is 'reduced' and to maintain backward
+        compatibility with earlier versions of numpy both it and the old
+        default 'full' can be omitted. Note that array h returned in 'raw'
+        mode is transposed for calling Fortran. The 'economic' mode is
+        deprecated. The modes 'full' and 'economic' may be passed using only
+        the first letter for backwards compatibility, but all others
+        must be spelled out.
+        Default mode is 'full' which is also default for numpy 1.6.1.
+        Note:   Default mode was left to full as full and reduced are both doing
+                the same thing in the new numpy version but only full works on the old
+                previous numpy version.
+    Returns :
+    ---------
+    q : matrix of float or complex, optional
+    A matrix with orthonormal columns. When mode = 'complete'
+    the result is an orthogonal/unitary matrix depending on whether
+    or not a is real/complex. The determinant may be either +/- 1 in that case.
+    r : matrix of float or complex, optional
+    The upper-triangular matrix.
+    """
+    x = [[2, 1], [3, 4]]
+    if isinstance(numpy.linalg.qr(x,mode), tuple):
+        return QRFull(mode)(a)
+    else:
+        return QRIncomplete(mode)(a)
 def _zero_disconnected(outputs, grads):
    l = []
    for o, g in zip(outputs, grads):

--- a/theano/sandbox/linalg/tests/test_linalg.py
+++ b/theano/sandbox/linalg/tests/test_linalg.py
@@ -26,6 +26,8 @@ from theano.sandbox.linalg.ops import (cholesky,
                                       AllocDiag,
                                       alloc_diag,
                                       det,
+                                       svd,
+                                       qr,
                                       #PSD_hint,
                                       trace,
                                       matrix_dot,
@@ -39,6 +41,7 @@ from theano.sandbox.linalg.ops import (cholesky,
 from theano.sandbox.linalg import eig, eigh, eigvalsh
 from nose.plugins.skip import SkipTest
 from nose.plugins.attrib import attr
+from nose.tools import assert_raises
 def check_lower_triangular(pd, ch_f):
@@ -177,6 +180,50 @@ def test_matrix_dot():
    assert _allclose(numpy_sol, theano_sol)
+def test_qr_modes():
+    rng = numpy.random.RandomState(utt.fetch_seed())
+    A = tensor.matrix("A", dtype=theano.config.floatX)
+    a = rng.rand(4, 4).astype(theano.config.floatX)
+    f = function([A], qr(A))
+    t_qr = f(a)
+    n_qr = numpy.linalg.qr(a)
+    assert _allclose(n_qr, t_qr)
+    for mode in ["reduced", "r", "raw", "full", "economic"]:
+        f = function([A], qr(A, mode))
+        t_qr = f(a)
+        n_qr = numpy.linalg.qr(a, mode)
+        if isinstance(n_qr, (list, tuple)):
+            assert _allclose(n_qr[0], t_qr[0])
+            assert _allclose(n_qr[1], t_qr[1])
+        else:
+            assert _allclose(n_qr, t_qr)
+    try:
+        n_qr = numpy.linalg.qr(a, "complete")
+        f = function([A], qr(A, "complete"))
+        t_qr = f(a)
+        assert _allclose(n_qr, t_qr)
+    except TypeError, e:
+        assert "name 'complete' is not defined" in str(e)
+def test_svd():
+    rng = numpy.random.RandomState(utt.fetch_seed())
+    A = tensor.matrix("A", dtype=theano.config.floatX)
+    U, V, T = svd(A)
+    fn = function([A], [U, V, T])
+    a = rng.rand(4, 4).astype(theano.config.floatX)
+    n_u, n_v, n_t = numpy.linalg.svd(a)
+    t_u, t_v, t_t = fn(a)
+    assert _allclose(n_u, t_u)
+    assert _allclose(n_v, t_v)
+    assert _allclose(n_t, t_t)
 def test_inverse_singular():
    singular = numpy.array([[1, 0, 0]] + [[0, 1, 0]] * 2,
                           dtype=theano.config.floatX)