提交 5e77a3c5 authored 作者: Tanjay94's avatar Tanjay94

Isolated Scipy dependent function in slinalg.py.

上级 921b6c2b
......@@ -14,6 +14,7 @@ from theano.tensor.opt import (register_stabilize,
from theano.gof import local_optimizer
from theano.gof.opt import Optimizer
from theano.gradient import DisconnectedType
from theano.tensor.nlinalg import ( MatrixInverse,
matrix_inverse,
AllocDiag,
......@@ -33,6 +34,18 @@ from theano.tensor.nlinalg import ( MatrixInverse,
_zero_disconnected
)
from theano.tensor.slinalg import ( Cholesky,
cholesky,
CholeskyGrad,
MatrixPinv,
pinv,
Solve,
solve,
Eigvalsh,
EigvalshGrad,
eigvalsh
)
try:
import scipy.linalg
imported_scipy = True
......
import logging
logger = logging.getLogger(__name__)
import numpy
from theano.gof import Op, Apply
from theano.tensor import as_tensor_variable, dot, DimShuffle, Dot
from theano.tensor.blas import Dot22
from theano import tensor
import theano.tensor
from theano.tensor.opt import (register_stabilize,
register_specialize, register_canonicalize)
from theano.gof import local_optimizer
from theano.gof.opt import Optimizer
from theano.gradient import DisconnectedType
try:
import scipy.linalg
imported_scipy = True
except ImportError:
# some ops (e.g. Cholesky, Solve, A_Xinv_b) won't work
imported_scipy = False
MATRIX_STRUCTURES = (
'general',
'symmetric',
'lower_triangular',
'upper_triangular',
'hermitian',
'banded',
'diagonal',
'toeplitz',
)
class Cholesky(Op):
"""
Return a triangular matrix square root of positive semi-definite `x`
L = cholesky(X, lower=True) implies dot(L, L.T) == X
"""
#TODO: inplace
#TODO: for specific dtypes
#TODO: LAPACK wrapper with in-place behavior, for solve also
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def infer_shape(self, node, shapes):
return [shapes[0]]
def __str__(self):
if self.lower:
lu = 'lower'
else:
lu = 'upper'
if self.destructive:
destr = 'destructive'
else:
destr = 'non-destructive'
return 'Cholesky{%s,%s}' % (lu, destr)
def make_node(self, x):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Cholesky op")
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, inputs, outputs):
x = inputs[0]
z = outputs[0]
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
def grad(self, inputs, gradients):
return [CholeskyGrad(self.lower)(inputs[0], self(inputs[0]),
gradients[0])]
cholesky = Cholesky()
class CholeskyGrad(Op):
"""
"""
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def __str__(self):
if self.lower:
lu = 'lower'
else:
lu = 'upper'
if self.destructive:
destr = 'destructive'
else:
destr = 'non-destructive'
return 'CholeskyGrad{%s,%s}' % (lu, destr)
def make_node(self, x, l, dz):
x = as_tensor_variable(x)
l = as_tensor_variable(l)
dz = as_tensor_variable(dz)
assert x.ndim == 2
assert l.ndim == 2
assert dz.ndim == 2
assert l.owner.op.lower == self.lower, (
"lower/upper mismatch between Cholesky op and CholeskyGrad op"
)
return Apply(self, [x, l, dz], [x.type()])
def perform(self, node, inputs, outputs):
"""Implements the "reverse-mode" gradient [1]_ for the
Cholesky factorization of a positive-definite matrix.
.. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
Journal of Computational and Graphical Statistics,
Vol. 4, No. 2 (Jun.,1995), pp. 134-147
http://www.jstor.org/stable/1390762
"""
x = inputs[0]
L = inputs[1]
dz = inputs[2]
dx = outputs[0]
N = x.shape[0]
if self.lower:
F = numpy.tril(dz)
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[i, k] -= F[i, j] * L[j, k]
F[j, k] -= F[i, j] * L[i, k]
for j in xrange(k + 1, N):
F[j, k] /= L[k, k]
F[k, k] -= L[j, k] * F[j, k]
F[k, k] /= (2 * L[k, k])
else:
F = numpy.triu(dz)
M = N - 1
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[k, i] -= F[j, i] * L[k, j]
F[k, j] -= F[j, i] * L[k, i]
for j in xrange(k + 1, N):
F[k, j] /= L[k, k]
F[k, k] -= L[k, j] * F[k, j]
F[k, k] /= (2 * L[k, k])
dx[0] = F
def infer_shape(self, node, shapes):
return [shapes[0]]
class MatrixPinv(Op):
"""Computes the pseudo-inverse of a matrix :math:`A`.
The pseudo-inverse of a matrix A, denoted :math:`A^+`, is
defined as: "the matrix that 'solves' [the least-squares problem]
:math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
:math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity matrix.
This method is not faster then `matrix_inverse`. Its strength comes from
that it works for non-square matrices.
If you have a square matrix though, `matrix_inverse` can be both more
exact and faster to compute. Also this op does not get optimized into a
solve op.
"""
def __init__(self):
pass
def props(self):
"""Function exposing different properties of each instance of the
op.
For the ``MatrixPinv`` op, there are no properties to be exposed.
"""
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z, )):
try:
if imported_scipy:
z[0] = scipy.linalg.pinv(x).astype(x.dtype)
else:
z[0] = numpy.linalg.pinv(x).astype(x.dtype)
except numpy.linalg.LinAlgError:
logger.debug('Failed to invert %s' % str(node.inputs[0]))
raise
def __str__(self):
return "MatrixPseudoInverse"
pinv = MatrixPinv()
class Solve(Op):
"""Solve a system of linear equations"""
def __init__(self,
A_structure='general',
lower=False,
overwrite_A=False,
overwrite_b=False):
if A_structure not in MATRIX_STRUCTURES:
raise ValueError('Invalid matrix structure argument', A_structure)
self.A_structure = A_structure
self.lower = lower
self.overwrite_A = overwrite_A
self.overwrite_b = overwrite_b
def props(self):
return (self.A_structure,
self.lower,
self.overwrite_A,
self.overwrite_b)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return type(self) == type(other) and self.props() == other.props()
def __repr__(self):
return 'Solve{%s}' % str(self.props())
def make_node(self, A, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Solve op")
A = as_tensor_variable(A)
b = as_tensor_variable(b)
assert A.ndim == 2
assert b.ndim in [1, 2]
otype = tensor.tensor(
broadcastable=b.broadcastable,
dtype=(A * b).dtype)
return Apply(self, [A, b], [otype])
def perform(self, node, inputs, output_storage):
A, b = inputs
#TODO: use the A_structure to go faster
output_storage[0][0] = scipy.linalg.solve(A, b)
# computes shape of x where x = inv(A) * b
def infer_shape(self, node, shapes):
Ashape, Bshape = shapes
rows = Ashape[1]
if len(Bshape) == 1: # b is a Vector
return [(rows,)]
else:
cols = Bshape[1] # b is a Matrix
return [(rows, cols)]
solve = Solve() # general solve
#TODO : SolveTriangular
#TODO: Optimizations to replace multiplication by matrix inverse
# with solve() Op (still unwritten)
class Eigvalsh(Op):
"""Generalized eigenvalues of a Hermetian positive definite eigensystem
"""
def __init__(self, lower=True):
assert lower in [True, False]
self.lower = lower
def props(self):
return (self.lower,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, a, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Eigvalsh op")
a, b = map(as_tensor_variable, (a, b))
assert a.ndim == 2
assert b.ndim == 2
out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
w = theano.tensor.vector(dtype=out_dtype)
return Apply(self, [a, b], [w])
def perform(self, node, (a, b), (w,)):
w[0] = scipy.linalg.eigvalsh(a=a, b=b, lower=self.lower)
def grad(self, inputs, g_outputs):
a, b = inputs
gw, = g_outputs
return EigvalshGrad(self.lower)(a, b, gw)
def infer_shape(self, node, shapes):
n = shapes[0][0]
return [(n,)]
class EigvalshGrad(Op):
"""Gradient of generalized eigenvalues of a Hermetian positive definite
eigensystem
"""
# Note: This Op (EigvalshGrad), should be removed and replaced with a graph
# of theano ops that is constructed directly in Eigvalsh.grad.
# But this can only be done once scipy.linalg.eigh is available as an Op
# (currently the Eigh uses numpy.linalg.eigh, which doesn't let you
# pass the right-hand-side matrix for a generalized eigenproblem.) See the
# discussion on github at
# https://github.com/Theano/Theano/pull/1846#discussion-diff-12486764
def __init__(self, lower=True):
assert lower in [True, False]
self.lower = lower
if lower:
self.tri0 = numpy.tril
self.tri1 = lambda a: numpy.triu(a, 1)
else:
self.tri0 = numpy.triu
self.tri1 = lambda a: numpy.tril(a, -1)
def props(self):
return (self.lower,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, a, b, gw):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the GEigvalsh op")
a, b, gw = map(as_tensor_variable, (a, b, gw))
assert a.ndim == 2
assert b.ndim == 2
assert gw.ndim == 1
out_dtype = theano.scalar.upcast(a.dtype, b.dtype, gw.dtype)
out1 = theano.tensor.matrix(dtype=out_dtype)
out2 = theano.tensor.matrix(dtype=out_dtype)
return Apply(self, [a, b, gw], [out1, out2])
def perform(self, node, (a, b, gw), outputs):
w, v = scipy.linalg.eigh(a, b, lower=self.lower)
gA = v.dot(numpy.diag(gw).dot(v.T))
gB = - v.dot(numpy.diag(gw*w).dot(v.T))
# See EighGrad comments for an explanation of these lines
out1 = self.tri0(gA) + self.tri1(gA).T
out2 = self.tri0(gB) + self.tri1(gB).T
outputs[0][0] = numpy.asarray(out1, dtype=node.outputs[0].dtype)
outputs[1][0] = numpy.asarray(out2, dtype=node.outputs[1].dtype)
def infer_shape(self, node, shapes):
return [shapes[0], shapes[1]]
def eigvalsh(a, b, lower=True):
return Eigvalsh(lower)(a, b)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论