提交 13bfa7d2 authored 作者: nouiz's avatar nouiz

Merge pull request #208 from dwf/cholesky_review

Cholesky Op review/tests
......@@ -303,25 +303,34 @@ MATRIX_STRUCTURES = (
'toeplitz',
)
class Cholesky(Op):
"""
Return a triangular matrix square root of positive semi-definite `x`
L = cholesky(X, lower=True) implies dot(L.T,L)==X
L = cholesky(X, lower=True) implies dot(L, L.T) == X
"""
#TODO: inplace
#TODO: for specific dtypes
#TODO: LAPACK wrapper with in-place behavior, for solve also
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self)==type(other) and self.props() == other.props())
def __repr__(self):
return (type(self) == type(other) and self.props() == other.props())
def infer_shape(self, node, shapes):
return [shapes[0]]
def __str__(self):
if self.lower:
lu = 'lower'
else:
......@@ -331,15 +340,104 @@ class Cholesky(Op):
else:
destr = 'non-destructive'
return 'Cholesky{%s,%s}' % (lu, destr)
def make_node(self, x):
x = as_tensor_variable(x)
return Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z,)):
def perform(self, node, inputs, outputs):
x = inputs[0]
z = outputs[0]
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
#def grad(self, (x, y), (gz,)):
#return dot(gz, y), dot(x, gz) #no transposing necessary
def grad(self, inputs, gradients):
return [CholeskyGrad(self.lower)(inputs[0], self(inputs[0]),
gradients[0])]
cholesky = Cholesky()
class CholeskyGrad(Op):
def __init__(self, lower=True):
self.lower = lower
self.destructive = False
def props(self):
return (self.lower,
self.destructive)
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def __str__(self):
if self.lower:
lu = 'lower'
else:
lu = 'upper'
if self.destructive:
destr = 'destructive'
else:
destr = 'non-destructive'
return 'CholeskyGrad{%s,%s}' % (lu, destr)
def make_node(self, x, l, dz):
x = as_tensor_variable(x)
l = as_tensor_variable(l)
dz = as_tensor_variable(dz)
assert l.owner.op.lower == self.lower, (
"lower/upper mismatch between Cholesky op and CholeskyGrad op"
)
return Apply(self, [x, l, dz], [x.type()])
def perform(self, node, inputs, outputs):
"""
Implements the "reverse-mode" gradient for the Cholesky factorization
of a positive-definite matrix.
References
----------
.. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
Journal of Computational and Graphical Statistics,
Vol. 4, No. 2 (Jun.,1995), pp. 134-147
http://www.jstor.org/stable/1390762
"""
x = inputs[0]
L = inputs[1]
dz = inputs[2]
dx = outputs[0]
N = x.shape[0]
if self.lower:
F = numpy.tril(dz)
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[i, k] -= F[i, j] * L[j, k]
F[j, k] -= F[i, j] * L[i, k]
for j in xrange(k + 1, N):
F[j, k] /= L[k, k]
F[k, k] -= L[j, k] * F[j, k]
F[k, k] /= (2 * L[k, k])
else:
F = numpy.triu(dz)
M = N - 1
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
F[k, i] -= F[j, i] * L[k, j]
F[k, j] -= F[j, i] * L[k, i]
for j in xrange(k + 1, N):
F[k, j] /= L[k, k]
F[k, k] -= L[k, j] * F[k, j]
F[k, k] /= (2 * L[k, k])
dx[0] = F
def infer_shape(self, node, shapes):
return [shapes[0]]
class MatrixInverse(Op):
"""Computes the inverse of a matrix :math:`A`.
......
......@@ -11,6 +11,8 @@ from theano import config
# The one in comment are not tested...
from theano.sandbox.linalg.ops import (cholesky,
Cholesky, # op class
CholeskyGrad,
matrix_inverse,
#solve,
#diag,
......@@ -27,29 +29,71 @@ from theano.sandbox.linalg.ops import (cholesky,
from nose.plugins.skip import SkipTest
if 0:
def test_cholesky():
#TODO: test upper and lower triangular
#todo: unittest randomseed
rng = numpy.random.RandomState(utt.fetch_seed())
def check_lower_triangular(pd, ch_f):
ch = ch_f(pd)
assert ch[0, pd.shape[1] - 1] == 0
assert ch[pd.shape[0] - 1, 0] != 0
assert numpy.allclose(numpy.dot(ch, ch.T), pd)
assert not numpy.allclose(numpy.dot(ch.T, ch), pd)
r = rng.randn(5,5)
pd = numpy.dot(r,r.T)
def check_upper_triangular(pd, ch_f):
ch = ch_f(pd)
assert ch[4, 0] == 0
assert ch[0, 4] != 0
assert numpy.allclose(numpy.dot(ch.T, ch), pd)
assert not numpy.allclose(numpy.dot(ch, ch.T), pd)
x = tensor.matrix()
chol = cholesky(x)
f = function([x], tensor.dot(chol, chol.T)) # an optimization could remove this
ch_f = function([x], chol)
def test_cholesky():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5)
pd = numpy.dot(r, r.T)
x = tensor.matrix()
chol = cholesky(x)
# Check the default.
ch_f = function([x], chol)
yield check_lower_triangular, pd, ch_f
# Explicit lower-triangular.
chol = Cholesky(lower=True)(x)
ch_f = function([x], chol)
yield check_lower_triangular, pd, ch_f
# Explicit upper-triangular.
chol = Cholesky(lower=False)(x)
ch_f = function([x], chol)
yield check_upper_triangular, pd, ch_f
def test_cholesky_grad():
rng = numpy.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5)
pd = numpy.dot(r, r.T)
# Check the default.
yield utt.verify_grad, cholesky, [pd], 3, rng
# Explicit lower-triangular.
yield utt.verify_grad, Cholesky(lower=True), [pd], 3, rng
# Explicit upper-triangular.
yield utt.verify_grad, Cholesky(lower=False), [pd], 3, rng
# quick check that chol is upper-triangular
ch = ch_f(pd)
print ch
assert ch[0,4] != 0
assert ch[4,0] == 0
assert numpy.allclose(numpy.dot(ch.T,ch),pd)
assert not numpy.allclose(numpy.dot(ch,ch.T),pd)
def test_cholesky_and_cholesky_grad_shape():
rng = numpy.random.RandomState(utt.fetch_seed())
x = tensor.matrix()
for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)):
f_chol = theano.function([x], l.shape)
g = tensor.grad(l.sum(), x)
f_cholgrad = theano.function([x], g.shape)
topo_chol = f_chol.maker.env.toposort()
topo_cholgrad = f_cholgrad.maker.env.toposort()
if config.mode != 'FAST_COMPILE':
assert sum([node.op.__class__ == Cholesky
for node in topo_chol]) == 0
assert sum([node.op.__class__ == CholeskyGrad
for node in topo_cholgrad]) == 0
for shp in [2, 3, 5]:
m = numpy.cov(rng.randn(shp, shp + 10)).astype(config.floatX)
yield numpy.testing.assert_equal, f_chol(m), (shp, shp)
yield numpy.testing.assert_equal, f_cholgrad(m), (shp, shp)
def test_inverse_correctness():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论