提交 63b827c6 authored 作者: Tanjay94's avatar Tanjay94

Fixed rebasing error.

上级 5e77a3c5
......@@ -348,272 +348,6 @@ def local_log_pow(node):
return [exponent * tensor.log(base)]
MATRIX_STRUCTURES = (
'general',
'symmetric',
'lower_triangular',
'upper_triangular',
'hermitian',
'banded',
'diagonal',
'toeplitz',
)
class Cholesky(Op):
"""
Return a triangular matrix square root of positive semi-definite `x`
L = cholesky(X, lower=True) implies dot(L, L.T) == X
"""
#TODO: inplace
#TODO: for specific dtypes
#TODO: LAPACK wrapper with in-place behavior, for solve also
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def infer_shape(self, node, shapes):
return [shapes[0]]
def __str__(self):
if self.lower:
lu = 'lower'
else:
lu = 'upper'
if self.destructive:
destr = 'destructive'
else:
destr = 'non-destructive'
return 'Cholesky{%s,%s}' % (lu, destr)
def make_node(self, x):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Cholesky op")
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, inputs, outputs):
x = inputs[0]
z = outputs[0]
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
def grad(self, inputs, gradients):
return [CholeskyGrad(self.lower)(inputs[0], self(inputs[0]),
gradients[0])]
cholesky = Cholesky()
class CholeskyGrad(Op):
"""
"""
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def __str__(self):
if self.lower:
lu = 'lower'
else:
lu = 'upper'
if self.destructive:
destr = 'destructive'
else:
destr = 'non-destructive'
return 'CholeskyGrad{%s,%s}' % (lu, destr)
def make_node(self, x, l, dz):
x = as_tensor_variable(x)
l = as_tensor_variable(l)
dz = as_tensor_variable(dz)
assert x.ndim == 2
assert l.ndim == 2
assert dz.ndim == 2
assert l.owner.op.lower == self.lower, (
"lower/upper mismatch between Cholesky op and CholeskyGrad op"
)
return Apply(self, [x, l, dz], [x.type()])
def perform(self, node, inputs, outputs):
"""Implements the "reverse-mode" gradient [1]_ for the
Cholesky factorization of a positive-definite matrix.
.. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
Journal of Computational and Graphical Statistics,
Vol. 4, No. 2 (Jun.,1995), pp. 134-147
http://www.jstor.org/stable/1390762
"""
x = inputs[0]
L = inputs[1]
dz = inputs[2]
dx = outputs[0]
N = x.shape[0]
if self.lower:
F = numpy.tril(dz)
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[i, k] -= F[i, j] * L[j, k]
F[j, k] -= F[i, j] * L[i, k]
for j in xrange(k + 1, N):
F[j, k] /= L[k, k]
F[k, k] -= L[j, k] * F[j, k]
F[k, k] /= (2 * L[k, k])
else:
F = numpy.triu(dz)
M = N - 1
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[k, i] -= F[j, i] * L[k, j]
F[k, j] -= F[j, i] * L[k, i]
for j in xrange(k + 1, N):
F[k, j] /= L[k, k]
F[k, k] -= L[k, j] * F[k, j]
F[k, k] /= (2 * L[k, k])
dx[0] = F
def infer_shape(self, node, shapes):
return [shapes[0]]
class MatrixPinv(Op):
"""Computes the pseudo-inverse of a matrix :math:`A`.
The pseudo-inverse of a matrix A, denoted :math:`A^+`, is
defined as: "the matrix that 'solves' [the least-squares problem]
:math:`Ax = b`," i.e., if :math:`\\bar{x}` is said solution, then
:math:`A^+` is that matrix such that :math:`\\bar{x} = A^+b`.
Note that :math:`Ax=AA^+b`, so :math:`AA^+` is close to the identity
matrix.
This method is not faster then `matrix_inverse`. Its strength comes from
that it works for non-square matrices.
If you have a square matrix though, `matrix_inverse` can be both more
exact and faster to compute. Also this op does not get optimized into a
solve op.
"""
def __init__(self):
pass
def props(self):
"""Function exposing different properties of each instance of the
op.
For the ``MatrixPinv`` op, there are no properties to be exposed.
"""
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
return Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z, )):
if imported_scipy:
z[0] = scipy.linalg.pinv(x).astype(x.dtype)
else:
z[0] = numpy.linalg.pinv(x).astype(x.dtype)
def __str__(self):
return "MatrixPseudoInverse"
pinv = MatrixPinv()
class Solve(Op):
"""Solve a system of linear equations"""
def __init__(self,
A_structure='general',
lower=False,
overwrite_A=False,
overwrite_b=False):
if A_structure not in MATRIX_STRUCTURES:
raise ValueError('Invalid matrix structure argument', A_structure)
self.A_structure = A_structure
self.lower = lower
self.overwrite_A = overwrite_A
self.overwrite_b = overwrite_b
def props(self):
return (self.A_structure,
self.lower,
self.overwrite_A,
self.overwrite_b)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return type(self) == type(other) and self.props() == other.props()
def __repr__(self):
return 'Solve{%s}' % str(self.props())
def make_node(self, A, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Solve op")
A = as_tensor_variable(A)
b = as_tensor_variable(b)
assert A.ndim == 2
assert b.ndim in [1, 2]
otype = tensor.tensor(
broadcastable=b.broadcastable,
dtype=(A * b).dtype)
return Apply(self, [A, b], [otype])
def perform(self, node, inputs, output_storage):
A, b = inputs
#TODO: use the A_structure to go faster
output_storage[0][0] = scipy.linalg.solve(A, b)
# computes shape of x where x = inv(A) * b
def infer_shape(self, node, shapes):
Ashape, Bshape = shapes
rows = Ashape[1]
if len(Bshape) == 1: # b is a Vector
return [(rows,)]
else:
cols = Bshape[1] # b is a Matrix
return [(rows, cols)]
solve = Solve() # general solve
#TODO : SolveTriangular
#TODO: Optimizations to replace multiplication by matrix inverse
# with solve() Op (still unwritten)
def spectral_radius_bound(X, log2_exponent):
"""
Returns upper bound on the largest eigenvalue of square symmetrix matrix X.
......@@ -643,46 +377,6 @@ def spectral_radius_bound(X, log2_exponent):
2 ** (-log2_exponent))
class Eig(Op):
"""Compute the eigenvalues and right eigenvectors of a square array.
"""
_numop = staticmethod(numpy.linalg.eig)
def props(self):
"""Function exposing different properties of each instance of the
op.
For the ``Eig`` op, there are no properties to be exposed.
"""
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
w = theano.tensor.vector(dtype=x.dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, v])
def perform(self, node, (x,), (w, v)):
w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
def infer_shape(self, node, shapes):
n = shapes[0][0]
return [(n,), (n, n)]
def __str__(self):
return self._numop.__name__.capitalize()
eig = Eig()
class SVD(Op):
# See doc in the docstring of the function just after this class.
......@@ -878,276 +572,6 @@ def qr(a, mode="full"):
return QRIncomplete(mode)(a)
def _zero_disconnected(outputs, grads):
l = []
for o, g in zip(outputs, grads):
if isinstance(g.type, DisconnectedType):
l.append(o.zeros_like())
else:
l.append(g)
return l
class Eigh(Eig):
"""
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
"""
_numop = staticmethod(numpy.linalg.eigh)
def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO
def __str__(self):
return 'Eigh{%s}' % self.UPLO
def props(self):
return self.UPLO,
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
# Numpy's linalg.eigh may return either double or single
# presision eigenvalues depending on installed version of
# LAPACK. Rather than trying to reproduce the (rather
# involved) logic, we just probe linalg.eigh with a trivial
# input.
w_dtype = self._numop([[numpy.dtype(x.dtype).type()]])[0].dtype.name
w = theano.tensor.vector(dtype=w_dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, v])
def perform(self, node, (x,), (w, v)):
try:
w[0], v[0] = self._numop(x, self.UPLO)
except numpy.linalg.LinAlgError:
logger.debug('Failed to find %s of %s' % (self._numop.__name__,
node.inputs[0]))
raise
def grad(self, inputs, g_outputs):
r"""The gradient function should return
.. math:: \sum_n\left(W_n\frac{\partial\,w_n}
{\partial a_{ij}} +
\sum_k V_{nk}\frac{\partial\,v_{nk}}
{\partial a_{ij}}\right),
where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
:math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`.
Analytic formulae for eigensystem gradients are well-known in
perturbation theory:
.. math:: \frac{\partial\,w_n}
{\partial a_{ij}} = v_{in}\,v_{jn}
.. math:: \frac{\partial\,v_{kn}}
{\partial a_{ij}} =
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
"""
x, = inputs
w, v = self(x)
# Replace gradients wrt disconnected variables with
# zeros. This is a work-around for issue #1063.
gw, gv = _zero_disconnected([w, v], g_outputs)
return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
def eigh(a, UPLO='L'):
return Eigh(UPLO)(a)
class EighGrad(Op):
"""Gradient of an eigensystem of a Hermitian matrix.
"""
def __init__(self, UPLO='L'):
assert UPLO in ['L', 'U']
self.UPLO = UPLO
if UPLO == 'L':
self.tri0 = numpy.tril
self.tri1 = lambda a: numpy.triu(a, 1)
else:
self.tri0 = numpy.triu
self.tri1 = lambda a: numpy.tril(a, -1)
def props(self):
return (self.UPLO,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def __str__(self):
return 'EighGrad{%s}' % self.UPLO
def make_node(self, x, w, v, gw, gv):
x, w, v, gw, gv = map(as_tensor_variable, (x, w, v, gw, gv))
assert x.ndim == 2
assert w.ndim == 1
assert v.ndim == 2
assert gw.ndim == 1
assert gv.ndim == 2
out_dtype = theano.scalar.upcast(x.dtype, w.dtype, v.dtype,
gw.dtype, gv.dtype)
out = theano.tensor.matrix(dtype=out_dtype)
return Apply(self, [x, w, v, gw, gv], [out])
def perform(self, node, inputs, outputs):
r"""
Implements the "reverse-mode" gradient for the eigensystem of
a square matrix.
"""
x, w, v, W, V = inputs
N = x.shape[0]
outer = numpy.outer
G = lambda n: sum(v[:, m] * V.T[n].dot(v[:, m]) / (w[n] - w[m])
for m in xrange(N) if m != n)
g = sum(outer(v[:, n], v[:, n] * W[n] + G(n))
for n in xrange(N))
# Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
# (triu(a)) only. This means that partial derivative of
# eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
# for i < j (i > j). At the same time, non-zero components of
# the gradient must account for the fact that variation of the
# opposite triangle contributes to variation of two elements
# of Hermitian (symmetric) matrix. The following line
# implements the necessary logic.
out = self.tri0(g) + self.tri1(g).T
# The call to self.tri0 in perform upcast from float32 to
# float64 or from int* to int64 in numpy 1.6.1 but not in
# 1.6.2. We do not want version dependent dtype in Theano.
# We think it should be the same as the output.
outputs[0][0] = numpy.asarray(out, dtype=node.outputs[0].dtype)
def infer_shape(self, node, shapes):
return [shapes[0]]
class Eigvalsh(Op):
"""Generalized eigenvalues of a Hermetian positive definite eigensystem
"""
def __init__(self, lower=True):
assert lower in [True, False]
self.lower = lower
def props(self):
return (self.lower,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, a, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Eigvalsh op")
a = as_tensor_variable(a)
assert a.ndim == 2
if not isinstance(b, (theano.Variable)):
if b is None:
b = theano.tensor.NoneConst
out_dtype = a.dtype
else:
b = as_tensor_variable(b)
out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
elif not isinstance(b.type, theano.tensor.NoneTypeT):
b = as_tensor_variable(b)
out_dtype = theano.scalar.upcast(a.dtype, b.dtype)
assert b.ndim == 2
else:
out_dtype = a.dtype
w = theano.tensor.vector(dtype=out_dtype)
return Apply(self, [a, b], [w])
def perform(self, node, (a, b), (w,)):
w[0] = scipy.linalg.eigvalsh(a=a, b=b, lower=self.lower)
def grad(self, inputs, g_outputs):
a, b = inputs
gw, = g_outputs
return EigvalshGrad(self.lower)(a, b, gw)
def infer_shape(self, node, shapes):
n = shapes[0][0]
return [(n,)]
class EigvalshGrad(Op):
"""Gradient of generalized eigenvalues of a Hermetian positive definite
eigensystem
"""
# Note: This Op (EigvalshGrad), should be removed and replaced with a graph
# of theano ops that is constructed directly in Eigvalsh.grad.
# But this can only be done once scipy.linalg.eigh is available as an Op
# (currently the Eigh uses numpy.linalg.eigh, which doesn't let you
# pass the right-hand-side matrix for a generalized eigenproblem.) See the
# discussion on github at
# https://github.com/Theano/Theano/pull/1846#discussion-diff-12486764
def __init__(self, lower=True):
assert lower in [True, False]
self.lower = lower
if lower:
self.tri0 = numpy.tril
self.tri1 = lambda a: numpy.triu(a, 1)
else:
self.tri0 = numpy.triu
self.tri1 = lambda a: numpy.tril(a, -1)
def props(self):
return (self.lower,)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def make_node(self, a, b, gw):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the GEigvalsh op")
a, b, gw = map(as_tensor_variable, (a, b, gw))
assert a.ndim == 2
assert b.ndim == 2
assert gw.ndim == 1
out_dtype = theano.scalar.upcast(a.dtype, b.dtype, gw.dtype)
out1 = theano.tensor.matrix(dtype=out_dtype)
out2 = theano.tensor.matrix(dtype=out_dtype)
return Apply(self, [a, b, gw], [out1, out2])
def perform(self, node, (a, b, gw), outputs):
w, v = scipy.linalg.eigh(a, b, lower=self.lower)
gA = v.dot(numpy.diag(gw).dot(v.T))
gB = - v.dot(numpy.diag(gw*w).dot(v.T))
# See EighGrad comments for an explanation of these lines
out1 = self.tri0(gA) + self.tri1(gA).T
out2 = self.tri0(gB) + self.tri1(gB).T
outputs[0][0] = numpy.asarray(out1, dtype=node.outputs[0].dtype)
outputs[1][0] = numpy.asarray(out2, dtype=node.outputs[1].dtype)
def infer_shape(self, node, shapes):
return [shapes[0], shapes[1]]
def eigvalsh(a, b, lower=True):
return Eigvalsh(lower)(a, b)
def matrix_power(M, n):
result = 1
for i in xrange(n):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论