Fixed rebasing error.

63b827c6 · Tanjay94 · 5e77a3c5 · 63b827c6
--- a/theano/sandbox/linalg/ops.py
+++ b/theano/sandbox/linalg/ops.py
@@ -348,272 +348,6 @@ def local_log_pow(node):
            return [exponent * tensor.log(base)]


-MATRIX_STRUCTURES = (
-        'general',
-        'symmetric',
-        'lower_triangular',
-        'upper_triangular',
-        'hermitian',
-        'banded',
-        'diagonal',
-        'toeplitz',
-        )
-
-
-class Cholesky(Op):
-    """
-    Return a triangular matrix square root of positive semi-definite `x`
-
-    L = cholesky(X, lower=True) implies dot(L, L.T) == X
-    """
-    #TODO: inplace
-    #TODO: for specific dtypes
-    #TODO: LAPACK wrapper with in-place behavior, for solve also
-    def __init__(self, lower=True):
-        self.lower = lower
-        self.destructive = False
-
-    def props(self):
-        return (self.lower,
-                self.destructive)
-
-    def __hash__(self):
-        return hash((type(self), self.props()))
-
-    def __eq__(self, other):
-        return (type(self) == type(other) and self.props() == other.props())
-
-    def infer_shape(self, node, shapes):
-        return [shapes[0]]
-
-    def __str__(self):
-        if self.lower:
-            lu = 'lower'
-        else:
-            lu = 'upper'
-        if self.destructive:
-            destr = 'destructive'
-        else:
-            destr = 'non-destructive'
-        return 'Cholesky{%s,%s}' % (lu, destr)
-
-    def make_node(self, x):
-        assert imported_scipy, (
-            "Scipy not available. Scipy is needed for the Cholesky op")
-        x = as_tensor_variable(x)
-        assert x.ndim == 2
-        return Apply(self, [x], [x.type()])
-
-    def perform(self, node, inputs, outputs):
-        x = inputs[0]
-        z = outputs[0]
-        z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
-
-    def grad(self, inputs, gradients):
-        return [CholeskyGrad(self.lower)(inputs[0], self(inputs[0]),
-                                         gradients[0])]
-
-cholesky = Cholesky()
-
-
-class CholeskyGrad(Op):
-    """
-    """
-    def __init__(self, lower=True):
-        self.lower = lower
-        self.destructive = False
-
-    def props(self):
-        return (self.lower,
-                self.destructive)
-
-    def __hash__(self):
-        return hash((type(self), self.props()))
-
-    def __eq__(self, other):
-        return (type(self) == type(other) and self.props() == other.props())
-
-    def __str__(self):
-        if self.lower:
-            lu = 'lower'
-        else:
-            lu = 'upper'
-        if self.destructive:
-            destr = 'destructive'
-        else:
-            destr = 'non-destructive'
-        return 'CholeskyGrad{%s,%s}' % (lu, destr)
-
-    def make_node(self, x, l, dz):
-        x = as_tensor_variable(x)
-        l = as_tensor_variable(l)
-        dz = as_tensor_variable(dz)
-        assert x.ndim == 2
-        assert l.ndim == 2
-        assert dz.ndim == 2
-        assert l.owner.op.lower == self.lower, (
-            "lower/upper mismatch between Cholesky op and CholeskyGrad op"
-        )
-        return Apply(self, [x, l, dz], [x.type()])
-
-    def perform(self, node, inputs, outputs):
-        """Implements the "reverse-mode" gradient [1]_ for the
-        Cholesky factorization of a positive-definite matrix.
-
-        .. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
-               Journal of Computational and Graphical Statistics,
-               Vol. 4, No. 2 (Jun.,1995), pp. 134-147
-               http://www.jstor.org/stable/1390762
-
-        """
-        x = inputs[0]
-        L = inputs[1]
-        dz = inputs[2]
-        dx = outputs[0]
-        N = x.shape[0]
-        if self.lower:
-            F = numpy.tril(dz)
-            for k in xrange(N - 1, -1, -1):
-                for j in xrange(k + 1, N):
-                    for i in xrange(j, N):
-                        F[i, k] -= F[i, j] * L[j, k]
-                        F[j, k] -= F[i, j] * L[i, k]
-                for j in xrange(k + 1, N):
-                    F[j, k] /= L[k, k]
-                    F[k, k] -= L[j, k] * F[j, k]
-                F[k, k] /= (2 * L[k, k])
-        else:
-            F = numpy.triu(dz)
-            M = N - 1
-            for k in xrange(N - 1, -1, -1):
-                for j in xrange(k + 1, N):
-                    for i in xrange(j, N):
-                        F[k, i] -= F[j, i] * L[k, j]
-                        F[k, j] -= F[j, i] * L[k, i]
-                for j in xrange(k + 1, N):
-                    F[k, j] /= L[k, k]
-                    F[k, k] -= L[k, j] * F[k, j]
-                F[k, k] /= (2 * L[k, k])
-        dx[0] = F
-
-    def infer_shape(self, node, shapes):
-        return [shapes[0]]
-
-
-class MatrixPinv(Op):
-    """Computes the pseudo-inverse of a matrix :math:`A`.
-
-    The pseudo-inverse of a matrix A, denoted :math:`A^+`, is
-    defined as: "the matrix that 'solves' [the least-squares problem]
-    :math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
-    :math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
-
-    Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity
-    matrix.
-    This method is not faster then `matrix_inverse`. Its strength comes from
-    that it works for non-square matrices.
-    If you have a square matrix though, `matrix_inverse` can be both more
-    exact and faster to compute. Also this op does not get optimized into a
-    solve op.
-    """
-    def __init__(self):
-        pass
-
-    def props(self):
-        """Function exposing different properties of each instance of the
-        op.
-
-        For the ``MatrixPinv`` op, there are no properties to be exposed.
-        """
-        return ()
-
-    def __hash__(self):
-        return hash((type(self), self.props()))
-
-    def __eq__(self, other):
-        return (type(self) == type(other) and self.props() == other.props())
-
-    def make_node(self, x):
-        x = as_tensor_variable(x)
-        assert x.ndim == 2
-        return Apply(self, [x], [x.type()])
-
-    def perform(self, node, (x,), (z, )):
-        if imported_scipy:
-            z[0] = scipy.linalg.pinv(x).astype(x.dtype)
-        else:
-            z[0] = numpy.linalg.pinv(x).astype(x.dtype)
-
-    def __str__(self):
-        return "MatrixPseudoInverse"
-
-pinv = MatrixPinv()
-
-
-class Solve(Op):
-    """Solve a system of linear equations"""
-    def __init__(self,
-                 A_structure='general',
-                 lower=False,
-                 overwrite_A=False,
-                 overwrite_b=False):
-        if A_structure not in MATRIX_STRUCTURES:
-            raise ValueError('Invalid matrix structure argument', A_structure)
-        self.A_structure = A_structure
-        self.lower = lower
-        self.overwrite_A = overwrite_A
-        self.overwrite_b = overwrite_b
-
-    def props(self):
-        return (self.A_structure,
-                self.lower,
-                self.overwrite_A,
-                self.overwrite_b)
-
-    def __hash__(self):
-        return hash((type(self), self.props()))
-
-    def __eq__(self, other):
-        return type(self) == type(other) and self.props() == other.props()
-
-    def __repr__(self):
-        return 'Solve{%s}' % str(self.props())
-
-    def make_node(self, A, b):
-        assert imported_scipy, (
-            "Scipy not available. Scipy is needed for the Solve op")
-        A = as_tensor_variable(A)
-        b = as_tensor_variable(b)
-        assert A.ndim == 2
-        assert b.ndim in [1, 2]
-        otype = tensor.tensor(
-                broadcastable=b.broadcastable,
-                dtype=(A * b).dtype)
-        return Apply(self, [A, b], [otype])
-
-    def perform(self, node, inputs, output_storage):
-        A, b = inputs
-        #TODO: use the A_structure to go faster
-        output_storage[0][0] = scipy.linalg.solve(A, b)
-
-    # computes shape of x where x = inv(A) * b
-    def infer_shape(self, node, shapes):
-        Ashape, Bshape = shapes
-        rows = Ashape[1]
-        if len(Bshape) == 1:  # b is a Vector
-            return [(rows,)]
-        else:
-            cols = Bshape[1]  # b is a Matrix
-            return [(rows, cols)]
-
-solve = Solve()  # general solve
-
-#TODO : SolveTriangular
-
-#TODO: Optimizations to replace multiplication by matrix inverse
-#      with solve() Op (still unwritten)
-
-
 def spectral_radius_bound(X, log2_exponent):
    """
    Returns upper bound on the largest eigenvalue of square symmetrix matrix X.
@@ -643,46 +377,6 @@ def spectral_radius_bound(X, log2_exponent):
            2 ** (-log2_exponent))


-class Eig(Op):
-    """Compute the eigenvalues and right eigenvectors of a square array.
-
-    """
-    _numop = staticmethod(numpy.linalg.eig)
-
-    def props(self):
-        """Function exposing different properties of each instance of the
-        op.
-
-        For the ``Eig`` op, there are no properties to be exposed.
-        """
-        return ()
-
-    def __hash__(self):
-        return hash((type(self), self.props()))
-
-    def __eq__(self, other):
-        return (type(self) == type(other) and self.props() == other.props())
-
-    def make_node(self, x):
-        x = as_tensor_variable(x)
-        assert x.ndim == 2
-        w = theano.tensor.vector(dtype=x.dtype)
-        v = theano.tensor.matrix(dtype=x.dtype)
-        return Apply(self, [x], [w, v])
-
-    def perform(self, node, (x,), (w, v)):
-        w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
-
-    def infer_shape(self, node, shapes):
-        n = shapes[0][0]
-        return [(n,), (n, n)]
-
-    def __str__(self):
-        return self._numop.__name__.capitalize()
-
-eig = Eig()
-
-
 class SVD(Op):

    # See doc in the docstring of the function just after this class.
@@ -878,276 +572,6 @@ def qr(a, mode="full"):
        return QRIncomplete(mode)(a)


-def _zero_disconnected(outputs, grads):
-    l = []
-    for o, g in zip(outputs, grads):
-        if isinstance(g.type, DisconnectedType):
-            l.append(o.zeros_like())
-        else:
-            l.append(g)
-    return l
-
-
-class Eigh(Eig):
-    """
-    Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
-
-    """
-    _numop = staticmethod(numpy.linalg.eigh)
-
-    def __init__(self, UPLO='L'):
-        assert UPLO in ['L', 'U']
-        self.UPLO = UPLO
-
-    def __str__(self):
-        return 'Eigh{%s}' % self.UPLO
-
-    def props(self):
-        return self.UPLO,
-
-    def make_node(self, x):
-        x = as_tensor_variable(x)
-        assert x.ndim == 2
-        # Numpy's linalg.eigh may return either double or single
-        # presision eigenvalues depending on installed version of
-        # LAPACK.  Rather than trying to reproduce the (rather
-        # involved) logic, we just probe linalg.eigh with a trivial
-        # input.
-        w_dtype = self._numop([[numpy.dtype(x.dtype).type()]])[0].dtype.name
-        w = theano.tensor.vector(dtype=w_dtype)
-        v = theano.tensor.matrix(dtype=x.dtype)
-        return Apply(self, [x], [w, v])
-
-    def perform(self, node, (x,), (w, v)):
-        try:
-            w[0], v[0] = self._numop(x, self.UPLO)
-        except numpy.linalg.LinAlgError:
-            logger.debug('Failed to find %s of %s' % (self._numop.__name__,
-                                                      node.inputs[0]))
-            raise
-
-    def grad(self, inputs, g_outputs):
-        r"""The gradient function should return
-
-           .. math:: \sum_n\left(W_n\frac{\partial\,w_n}
-                           {\partial a_{ij}} +
-                     \sum_k V_{nk}\frac{\partial\,v_{nk}}
-                           {\partial a_{ij}}\right),
-
-        where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
-        :math:`a` to ``inputs``, and  :math:`(w, v)=\mbox{eig}(a)`.
-
-        Analytic formulae for eigensystem gradients are well-known in
-        perturbation theory:
-
-           .. math:: \frac{\partial\,w_n}
-                          {\partial a_{ij}} = v_{in}\,v_{jn}
-
-
-           .. math:: \frac{\partial\,v_{kn}}
-                          {\partial a_{ij}} =
-                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
-        """
-        x, = inputs
-        w, v = self(x)
-        # Replace gradients wrt disconnected variables with
-        # zeros. This is a work-around for issue #1063.
-        gw, gv = _zero_disconnected([w, v], g_outputs)
-        return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
-
-
-def eigh(a, UPLO='L'):
-    return Eigh(UPLO)(a)
-
-
-class EighGrad(Op):
-    """Gradient of an eigensystem of a Hermitian matrix.
-
-    """
-    def __init__(self, UPLO='L'):
-        assert UPLO in ['L', 'U']
-        self.UPLO = UPLO
-        if UPLO == 'L':
-            self.tri0 = numpy.tril
-            self.tri1 = lambda a: numpy.triu(a, 1)
-        else:
-            self.tri0 = numpy.triu
-            self.tri1 = lambda a: numpy.tril(a, -1)
-
-    def props(self):
-        return (self.UPLO,)
-
-    def __hash__(self):
-        return hash((type(self), self.props()))
-
-    def __eq__(self, other):
-        return (type(self) == type(other) and self.props() == other.props())
-
-    def __str__(self):
-        return 'EighGrad{%s}' % self.UPLO
-
-    def make_node(self, x, w, v, gw, gv):
-        x, w, v, gw, gv = map(as_tensor_variable, (x, w, v, gw, gv))
-        assert x.ndim == 2
-        assert w.ndim == 1
-        assert v.ndim == 2
-        assert gw.ndim == 1
-        assert gv.ndim == 2
-        out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
-                                         gw.dtype, gv.dtype)
-        out = theano.tensor.matrix(dtype=out_dtype)
-        return Apply(self, [x, w, v, gw, gv], [out])
-
-    def perform(self, node, inputs, outputs):
-        r"""
-        Implements the "reverse-mode" gradient for the eigensystem of
-        a square matrix.
-        """
-        x, w, v, W, V = inputs
-        N = x.shape[0]
-        outer = numpy.outer
-
-        G = lambda n: sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
-                          for m in xrange(N) if m != n)
-        g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
-                for n in xrange(N))
-
-        # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
-        # (triu(a)) only.  This means that partial derivative of
-        # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
-        # for i < j (i > j).  At the same time, non-zero components of
-        # the gradient must account for the fact that variation of the
-        # opposite triangle contributes to variation of two elements
-        # of Hermitian (symmetric) matrix. The following line
-        # implements the necessary logic.
-        out = self.tri0(g) + self.tri1(g).T
-
-        # The call to self.tri0 in perform upcast from float32 to
-        # float64 or from int* to int64 in numpy 1.6.1 but not in
-        # 1.6.2. We do not want version dependent dtype in Theano.
-        # We think it should be the same as the output.
-        outputs[0][0] = numpy.asarray(out, dtype=node.outputs[0].dtype)
-
-    def infer_shape(self, node, shapes):
-        return [shapes[0]]
-
-
-class Eigvalsh(Op):
-    """Generalized eigenvalues of a Hermetian positive definite eigensystem
-    """
-
-    def __init__(self, lower=True):
-        assert lower in [True, False]
-        self.lower = lower
-
-    def props(self):
-        return (self.lower,)
-
-    def __hash__(self):
-        return hash((type(self), self.props()))
-
-    def __eq__(self, other):
-        return (type(self) == type(other) and self.props() == other.props())
-
-    def make_node(self, a, b):
-        assert imported_scipy, (
-            "Scipy not available. Scipy is needed for the Eigvalsh op")
-        a = as_tensor_variable(a)
-        assert a.ndim == 2
-        if not isinstance(b, (theano.Variable)):
-            if b is None:
-                b = theano.tensor.NoneConst
-                out_dtype = a.dtype
-            else:
-                b = as_tensor_variable(b)
-                out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
-        elif not isinstance(b.type, theano.tensor.NoneTypeT):
-            b = as_tensor_variable(b)
-            out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
-            assert b.ndim == 2
-        else:
-            out_dtype = a.dtype
-
-        w = theano.tensor.vector(dtype=out_dtype)
-        return Apply(self, [a, b], [w])
-
-    def perform(self, node, (a, b), (w,)):
-        w[0] = scipy.linalg.eigvalsh(a=a, b=b, lower=self.lower)
-
-    def grad(self, inputs, g_outputs):
-        a, b = inputs
-        gw, = g_outputs
-        return EigvalshGrad(self.lower)(a, b, gw)
-
-    def infer_shape(self, node, shapes):
-        n = shapes[0][0]
-        return [(n,)]
-
-
-class EigvalshGrad(Op):
-    """Gradient of generalized eigenvalues of a Hermetian positive definite
-    eigensystem
-    """
-
-    # Note: This Op (EigvalshGrad), should be removed and replaced with a graph
-    # of theano ops that is constructed directly in Eigvalsh.grad.
-    # But this can only be done once scipy.linalg.eigh is available as an Op
-    # (currently the Eigh uses numpy.linalg.eigh, which doesn't let you
-    # pass the right-hand-side matrix for a generalized eigenproblem.) See the
-    # discussion on github at
-    # https://github.com/Theano/Theano/pull/1846#discussion-diff-12486764
-
-    def __init__(self, lower=True):
-        assert lower in [True, False]
-        self.lower = lower
-        if lower:
-            self.tri0 = numpy.tril
-            self.tri1 = lambda a: numpy.triu(a, 1)
-        else:
-            self.tri0 = numpy.triu
-            self.tri1 = lambda a: numpy.tril(a, -1)
-
-    def props(self):
-        return (self.lower,)
-
-    def __hash__(self):
-        return hash((type(self), self.props()))
-
-    def __eq__(self, other):
-        return (type(self) == type(other) and self.props() == other.props())
-
-    def make_node(self, a, b, gw):
-        assert imported_scipy, (
-            "Scipy not available. Scipy is needed for the GEigvalsh op")
-        a, b, gw = map(as_tensor_variable, (a, b, gw))
-        assert a.ndim == 2
-        assert b.ndim == 2
-        assert gw.ndim == 1
-
-        out_dtype = theano.scalar.upcast(a.dtype, b.dtype, gw.dtype)
-        out1 = theano.tensor.matrix(dtype=out_dtype)
-        out2 = theano.tensor.matrix(dtype=out_dtype)
-        return Apply(self, [a, b, gw], [out1, out2])
-
-    def perform(self, node, (a, b, gw), outputs):
-        w, v = scipy.linalg.eigh(a, b, lower=self.lower)
-        gA = v.dot(numpy.diag(gw).dot(v.T))
-        gB = - v.dot(numpy.diag(gw*w).dot(v.T))
-
-        # See EighGrad comments for an explanation of these lines
-        out1 = self.tri0(gA) + self.tri1(gA).T
-        out2 = self.tri0(gB) + self.tri1(gB).T
-        outputs[0][0] = numpy.asarray(out1, dtype=node.outputs[0].dtype)
-        outputs[1][0] = numpy.asarray(out2, dtype=node.outputs[1].dtype)
-
-    def infer_shape(self, node, shapes):
-        return [shapes[0], shapes[1]]
-
-
-def eigvalsh(a, b, lower=True):
-    return Eigvalsh(lower)(a, b)
-
-
 def matrix_power(M, n):
    result = 1
    for i in xrange(n):