Isolated Scipy dependent function in slinalg.py.

5e77a3c5 · Tanjay94 · 921b6c2b · 5e77a3c5 · 5e77a3c5
--- a/theano/sandbox/linalg/ops.py
+++ b/theano/sandbox/linalg/ops.py
@@ -14,6 +14,7 @@ from theano.tensor.opt import (register_stabilize,
 from theano.gof import local_optimizer
 from theano.gof.opt import Optimizer
 from theano.gradient import DisconnectedType
+
 from theano.tensor.nlinalg import ( MatrixInverse,
                                    matrix_inverse,
                                    AllocDiag,
@@ -33,6 +34,18 @@ from theano.tensor.nlinalg import ( MatrixInverse,
                                    _zero_disconnected
                                    )

+from theano.tensor.slinalg import ( Cholesky,
+                                    cholesky,
+                                    CholeskyGrad,
+                                    MatrixPinv,
+                                    pinv,
+                                    Solve,
+                                    solve,
+                                    Eigvalsh,
+                                    EigvalshGrad,
+                                    eigvalsh
+                                    )
+
 try:
    import scipy.linalg
    imported_scipy = True

--- a/theano/tensor/slinalg.py
+++ b/theano/tensor/slinalg.py
+import logging
+
+logger = logging.getLogger(__name__)
+import numpy
+
+from theano.gof import Op, Apply
+
+from theano.tensor import as_tensor_variable, dot, DimShuffle, Dot
+from theano.tensor.blas import Dot22
+from theano import tensor
+import theano.tensor
+from theano.tensor.opt import (register_stabilize,
+        register_specialize, register_canonicalize)
+from theano.gof import local_optimizer
+from theano.gof.opt import Optimizer
+from theano.gradient import DisconnectedType
+
+try:
+    import scipy.linalg
+    imported_scipy = True
+except ImportError:
+    # some ops (e.g. Cholesky, Solve, A_Xinv_b) won't work
+    imported_scipy = False
+
+MATRIX_STRUCTURES = (
+        'general',
+        'symmetric',
+        'lower_triangular',
+        'upper_triangular',
+        'hermitian',
+        'banded',
+        'diagonal',
+        'toeplitz',
+        )
+
+class Cholesky(Op):
+    """
+    Return a triangular matrix square root of positive semi-definite `x`
+
+    L = cholesky(X, lower=True) implies dot(L, L.T) == X
+    """
+    #TODO: inplace
+    #TODO: for specific dtypes
+    #TODO: LAPACK wrapper with in-place behavior, for solve also
+    def __init__(self, lower=True):
+        self.lower = lower
+        self.destructive = False
+
+    def props(self):
+        return (self.lower,
+                self.destructive)
+
+    def __hash__(self):
+        return hash((type(self), self.props()))
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
+    def __str__(self):
+        if self.lower:
+            lu = 'lower'
+        else:
+            lu = 'upper'
+        if self.destructive:
+            destr = 'destructive'
+        else:
+            destr = 'non-destructive'
+        return 'Cholesky{%s,%s}' % (lu, destr)
+
+    def make_node(self, x):
+        assert imported_scipy, (
+            "Scipy not available. Scipy is needed for the Cholesky op")
+        x = as_tensor_variable(x)
+        assert x.ndim == 2
+        return Apply(self, [x], [x.type()])
+
+    def perform(self, node, inputs, outputs):
+        x = inputs[0]
+        z = outputs[0]
+        z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
+
+    def grad(self, inputs, gradients):
+        return [CholeskyGrad(self.lower)(inputs[0], self(inputs[0]),
+                                         gradients[0])]
+
+cholesky = Cholesky()
+
+
+class CholeskyGrad(Op):
+    """
+    """
+    def __init__(self, lower=True):
+        self.lower = lower
+        self.destructive = False
+
+    def props(self):
+        return (self.lower,
+                self.destructive)
+
+    def __hash__(self):
+        return hash((type(self), self.props()))
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+
+    def __str__(self):
+        if self.lower:
+            lu = 'lower'
+        else:
+            lu = 'upper'
+        if self.destructive:
+            destr = 'destructive'
+        else:
+            destr = 'non-destructive'
+        return 'CholeskyGrad{%s,%s}' % (lu, destr)
+
+    def make_node(self, x, l, dz):
+        x = as_tensor_variable(x)
+        l = as_tensor_variable(l)
+        dz = as_tensor_variable(dz)
+        assert x.ndim == 2
+        assert l.ndim == 2
+        assert dz.ndim == 2
+        assert l.owner.op.lower == self.lower, (
+            "lower/upper mismatch between Cholesky op and CholeskyGrad op"
+        )
+        return Apply(self, [x, l, dz], [x.type()])
+
+    def perform(self, node, inputs, outputs):
+        """Implements the "reverse-mode" gradient [1]_ for the
+        Cholesky factorization of a positive-definite matrix.
+
+        .. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
+               Journal of Computational and Graphical Statistics,
+               Vol. 4, No. 2 (Jun.,1995), pp. 134-147
+               http://www.jstor.org/stable/1390762
+
+        """
+        x = inputs[0]
+        L = inputs[1]
+        dz = inputs[2]
+        dx = outputs[0]
+        N = x.shape[0]
+        if self.lower:
+            F = numpy.tril(dz)
+            for k in xrange(N - 1, -1, -1):
+                for j in xrange(k + 1, N):
+                    for i in xrange(j, N):
+                        F[i, k] -= F[i, j] * L[j, k]
+                        F[j, k] -= F[i, j] * L[i, k]
+                for j in xrange(k + 1, N):
+                    F[j, k] /= L[k, k]
+                    F[k, k] -= L[j, k] * F[j, k]
+                F[k, k] /= (2 * L[k, k])
+        else:
+            F = numpy.triu(dz)
+            M = N - 1
+            for k in xrange(N - 1, -1, -1):
+                for j in xrange(k + 1, N):
+                    for i in xrange(j, N):
+                        F[k, i] -= F[j, i] * L[k, j]
+                        F[k, j] -= F[j, i] * L[k, i]
+                for j in xrange(k + 1, N):
+                    F[k, j] /= L[k, k]
+                    F[k, k] -= L[k, j] * F[k, j]
+                F[k, k] /= (2 * L[k, k])
+        dx[0] = F
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
+
+class MatrixPinv(Op):
+    """Computes the pseudo-inverse of a matrix :math:`A`.
+
+    The pseudo-inverse of a matrix A, denoted :math:`A^+`, is
+    defined as: "the matrix that 'solves' [the least-squares problem]
+    :math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
+    :math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
+
+    Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix.
+    This method is not faster then `matrix_inverse`. Its strength comes from
+    that it works for non-square matrices.
+    If you have a square matrix though, `matrix_inverse` can be both more
+    exact and faster to compute. Also this op does not get optimized into a
+    solve op.
+    """
+    def __init__(self):
+        pass
+
+    def props(self):
+        """Function exposing different properties of each instance of the
+        op.
+
+        For the ``MatrixPinv`` op, there are no properties to be exposed.
+        """
+        return ()
+
+    def __hash__(self):
+        return hash((type(self), self.props()))
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+
+    def make_node(self, x):
+        x = as_tensor_variable(x)
+        assert x.ndim == 2
+        return Apply(self, [x], [x.type()])
+
+    def perform(self, node, (x,), (z, )):
+        try:
+            if imported_scipy:
+                z[0] = scipy.linalg.pinv(x).astype(x.dtype)
+            else:
+                z[0] = numpy.linalg.pinv(x).astype(x.dtype)
+        except numpy.linalg.LinAlgError:
+            logger.debug('Failed to invert %s' % str(node.inputs[0]))
+            raise
+
+    def __str__(self):
+        return "MatrixPseudoInverse"
+
+pinv = MatrixPinv()
+
+
+class Solve(Op):
+    """Solve a system of linear equations"""
+    def __init__(self,
+                 A_structure='general',
+                 lower=False,
+                 overwrite_A=False,
+                 overwrite_b=False):
+        if A_structure not in MATRIX_STRUCTURES:
+            raise ValueError('Invalid matrix structure argument', A_structure)
+        self.A_structure = A_structure
+        self.lower = lower
+        self.overwrite_A = overwrite_A
+        self.overwrite_b = overwrite_b
+
+    def props(self):
+        return (self.A_structure,
+                self.lower,
+                self.overwrite_A,
+                self.overwrite_b)
+
+    def __hash__(self):
+        return hash((type(self), self.props()))
+
+    def __eq__(self, other):
+        return type(self) == type(other) and self.props() == other.props()
+
+    def __repr__(self):
+        return 'Solve{%s}' % str(self.props())
+
+    def make_node(self, A, b):
+        assert imported_scipy, (
+            "Scipy not available. Scipy is needed for the Solve op")
+        A = as_tensor_variable(A)
+        b = as_tensor_variable(b)
+        assert A.ndim == 2
+        assert b.ndim in [1, 2]
+        otype = tensor.tensor(
+                broadcastable=b.broadcastable,
+                dtype=(A * b).dtype)
+        return Apply(self, [A, b], [otype])
+
+    def perform(self, node, inputs, output_storage):
+        A, b = inputs
+        #TODO: use the A_structure to go faster
+        output_storage[0][0] = scipy.linalg.solve(A, b)
+
+    # computes shape of x where x = inv(A) * b
+    def infer_shape(self, node, shapes):
+        Ashape, Bshape = shapes
+        rows = Ashape[1]
+        if len(Bshape) == 1:  # b is a Vector
+            return [(rows,)]
+        else:
+            cols = Bshape[1]  # b is a Matrix
+            return [(rows, cols)]
+
+solve = Solve()  # general solve
+
+#TODO : SolveTriangular
+
+#TODO: Optimizations to replace multiplication by matrix inverse
+#      with solve() Op (still unwritten)
+
+
+class Eigvalsh(Op):
+    """Generalized eigenvalues of a Hermetian positive definite eigensystem
+    """
+
+    def __init__(self, lower=True):
+        assert lower in [True, False]
+        self.lower = lower
+
+    def props(self):
+        return (self.lower,)
+
+    def __hash__(self):
+        return hash((type(self), self.props()))
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+
+    def make_node(self, a, b):
+        assert imported_scipy, (
+            "Scipy not available. Scipy is needed for the Eigvalsh op")
+        a, b = map(as_tensor_variable, (a, b))
+        assert a.ndim == 2
+        assert b.ndim == 2
+
+        out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
+        w = theano.tensor.vector(dtype=out_dtype)
+        return Apply(self, [a, b], [w])
+
+    def perform(self, node, (a, b), (w,)):
+        w[0] = scipy.linalg.eigvalsh(a=a, b=b, lower=self.lower)
+
+    def grad(self, inputs, g_outputs):
+        a, b = inputs
+        gw, = g_outputs
+        return EigvalshGrad(self.lower)(a, b, gw)
+
+    def infer_shape(self, node, shapes):
+        n = shapes[0][0]
+        return [(n,)]
+
+
+class EigvalshGrad(Op):
+    """Gradient of generalized eigenvalues of a Hermetian positive definite
+    eigensystem
+    """
+
+    # Note: This Op (EigvalshGrad), should be removed and replaced with a graph
+    # of theano ops that is constructed directly in Eigvalsh.grad.
+    # But this can only be done once scipy.linalg.eigh is available as an Op
+    # (currently the Eigh uses numpy.linalg.eigh, which doesn't let you
+    # pass the right-hand-side matrix for a generalized eigenproblem.) See the
+    # discussion on github at
+    # https://github.com/Theano/Theano/pull/1846#discussion-diff-12486764
+
+    def __init__(self, lower=True):
+        assert lower in [True, False]
+        self.lower = lower
+        if lower:
+            self.tri0 = numpy.tril
+            self.tri1 = lambda a: numpy.triu(a, 1)
+        else:
+            self.tri0 = numpy.triu
+            self.tri1 = lambda a: numpy.tril(a, -1)
+
+    def props(self):
+        return (self.lower,)
+
+    def __hash__(self):
+        return hash((type(self), self.props()))
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+
+    def make_node(self, a, b, gw):
+        assert imported_scipy, (
+            "Scipy not available. Scipy is needed for the GEigvalsh op")
+        a, b, gw = map(as_tensor_variable, (a, b, gw))
+        assert a.ndim == 2
+        assert b.ndim == 2
+        assert gw.ndim == 1
+
+        out_dtype = theano.scalar.upcast(a.dtype, b.dtype, gw.dtype)
+        out1 = theano.tensor.matrix(dtype=out_dtype)
+        out2 = theano.tensor.matrix(dtype=out_dtype)
+        return Apply(self, [a, b, gw], [out1, out2])
+
+    def perform(self, node, (a, b, gw), outputs):
+        w, v = scipy.linalg.eigh(a, b, lower=self.lower)
+        gA = v.dot(numpy.diag(gw).dot(v.T))
+        gB = - v.dot(numpy.diag(gw*w).dot(v.T))
+
+        # See EighGrad comments for an explanation of these lines
+        out1 = self.tri0(gA) + self.tri1(gA).T
+        out2 = self.tri0(gB) + self.tri1(gB).T
+        outputs[0][0] = numpy.asarray(out1, dtype=node.outputs[0].dtype)
+        outputs[1][0] = numpy.asarray(out2, dtype=node.outputs[1].dtype)
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0], shapes[1]]
+
+
+def eigvalsh(a, b, lower=True):
+    return Eigvalsh(lower)(a, b)