提交 d8b54e41 authored 作者: Adrian Seyboldt's avatar Adrian Seyboldt

Add on_error option for Cholesky

上级 3df1e8a3
...@@ -36,16 +36,28 @@ class Cholesky(Op): ...@@ -36,16 +36,28 @@ class Cholesky(Op):
L = cholesky(X, lower=True) implies dot(L, L.T) == X. L = cholesky(X, lower=True) implies dot(L, L.T) == X.
Parameters
----------
lower : bool, default=True
Whether to return the lower or upper cholesky factor
on_error : ['raise', 'nan']
If on_error is set to 'raise', this Op will raise a
`scipy.linalg.LinAlgError` if the matrix is not positive definite.
If on_error is set to 'nan', it will return a matrix containing
nans instead.
""" """
# TODO: inplace # TODO: inplace
# TODO: for specific dtypes # TODO: for specific dtypes
# TODO: LAPACK wrapper with in-place behavior, for solve also # TODO: LAPACK wrapper with in-place behavior, for solve also
__props__ = ('lower', 'destructive') __props__ = ('lower', 'destructive', 'on_error')
def __init__(self, lower=True): def __init__(self, lower=True, on_error='raise'):
self.lower = lower self.lower = lower
self.destructive = False self.destructive = False
if on_error not in ['raise', 'nan']:
raise ValueError('on_error must be one of "raise" or ""nan"')
self.on_error = on_error
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
return [shapes[0]] return [shapes[0]]
...@@ -60,7 +72,13 @@ class Cholesky(Op): ...@@ -60,7 +72,13 @@ class Cholesky(Op):
def perform(self, node, inputs, outputs): def perform(self, node, inputs, outputs):
x = inputs[0] x = inputs[0]
z = outputs[0] z = outputs[0]
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype) try:
z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
except scipy.linalg.LinAlgError:
if self.on_error == 'raise':
raise
else:
z[0] = (np.zeros(x.shape) * np.nan).astype(x.dtype)
def grad(self, inputs, gradients): def grad(self, inputs, gradients):
""" """
...@@ -79,6 +97,13 @@ class Cholesky(Op): ...@@ -79,6 +97,13 @@ class Cholesky(Op):
dz = gradients[0] dz = gradients[0]
chol_x = self(x) chol_x = self(x)
# Replace the cholesky decomposition with 1 if there are nans
# or solve_upper_triangular will throw a ValueError.
if self.on_error == 'nan':
ok = ~tensor.any(tensor.isnan(chol_x))
chol_x = tensor.switch(ok, chol_x, 1)
dz = tensor.switch(ok, dz, 1)
# deal with upper triangular by converting to lower triangular # deal with upper triangular by converting to lower triangular
if not self.lower: if not self.lower:
chol_x = chol_x.T chol_x = chol_x.T
...@@ -97,9 +122,14 @@ class Cholesky(Op): ...@@ -97,9 +122,14 @@ class Cholesky(Op):
chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz))) chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))
if self.lower: if self.lower:
return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))] grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
else:
grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))
if self.on_error == 'nan':
return [tensor.switch(ok, grad, np.nan)]
else: else:
return [tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))] return [grad]
cholesky = Cholesky() cholesky = Cholesky()
......
...@@ -10,7 +10,7 @@ from numpy import inf ...@@ -10,7 +10,7 @@ from numpy import inf
import itertools import itertools
import theano import theano
from theano import tensor, function from theano import tensor, function, grad
from theano.tensor.basic import _allclose from theano.tensor.basic import _allclose
from theano.tests.test_rop import break_op from theano.tests.test_rop import break_op
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
...@@ -67,6 +67,23 @@ def test_cholesky(): ...@@ -67,6 +67,23 @@ def test_cholesky():
chol = Cholesky(lower=False)(x) chol = Cholesky(lower=False)(x)
ch_f = function([x], chol) ch_f = function([x], chol)
yield check_upper_triangular, pd, ch_f yield check_upper_triangular, pd, ch_f
chol = Cholesky(lower=False, on_error='nan')(x)
ch_f = function([x], chol)
yield check_upper_triangular, pd, ch_f
def test_cholesky_indef():
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
x = tensor.matrix()
matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX)
cholesky = Cholesky(lower=True, on_error='raise')
chol_f = function([x], cholesky(x))
with assert_raises(scipy.linalg.LinAlgError):
chol_f(matrix)
cholesky = Cholesky(lower=True, on_error='nan')
chol_f = function([x], cholesky(x))
assert np.all(np.isnan(chol_f(matrix)))
def test_cholesky_grad(): def test_cholesky_grad():
...@@ -88,6 +105,20 @@ def test_cholesky_grad(): ...@@ -88,6 +105,20 @@ def test_cholesky_grad():
[r], 3, rng)) [r], 3, rng))
def test_cholesky_grad_indef():
if not imported_scipy:
raise SkipTest("Scipy needed for the Cholesky op.")
x = tensor.matrix()
matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX)
cholesky = Cholesky(lower=True, on_error='raise')
chol_f = function([x], grad(cholesky(x).sum(), [x]))
with assert_raises(scipy.linalg.LinAlgError):
chol_f(matrix)
cholesky = Cholesky(lower=True, on_error='nan')
chol_f = function([x], grad(cholesky(x).sum(), [x]))
assert np.all(np.isnan(chol_f(matrix)))
@attr('slow') @attr('slow')
def test_cholesky_and_cholesky_grad_shape(): def test_cholesky_and_cholesky_grad_shape():
if not imported_scipy: if not imported_scipy:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论