Add on_error option for Cholesky

d8b54e41 · Adrian Seyboldt · 3df1e8a3 · d8b54e41 · d8b54e41
--- a/theano/tensor/slinalg.py
+++ b/theano/tensor/slinalg.py
@@ -36,16 +36,28 @@ class Cholesky(Op):
    L = cholesky(X, lower=True) implies dot(L, L.T) == X.
+    Parameters
+    ----------
+    lower : bool, default=True
+        Whether to return the lower or upper cholesky factor
+    on_error : ['raise', 'nan']
+        If on_error is set to 'raise', this Op will raise a
+        `scipy.linalg.LinAlgError` if the matrix is not positive definite.
+        If on_error is set to 'nan', it will return a matrix containing
+        nans instead.
    """
    # TODO: inplace
    # TODO: for specific dtypes
    # TODO: LAPACK wrapper with in-place behavior, for solve also
-    __props__ = ('lower', 'destructive')
+    __props__ = ('lower', 'destructive', 'on_error')
-    def __init__(self, lower=True):
+    def __init__(self, lower=True, on_error='raise'):
        self.lower = lower
        self.destructive = False
+        if on_error not in ['raise', 'nan']:
+            raise ValueError('on_error must be one of "raise" or ""nan"')
+        self.on_error = on_error
    def infer_shape(self, node, shapes):
        return [shapes[0]]
@@ -60,7 +72,13 @@ class Cholesky(Op):
    def perform(self, node, inputs, outputs):
        x = inputs[0]
        z = outputs[0]
-        z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
+        try:
+            z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
+        except scipy.linalg.LinAlgError:
+            if self.on_error == 'raise':
+                raise
+            else:
+                z[0] = (np.zeros(x.shape) * np.nan).astype(x.dtype)
    def grad(self, inputs, gradients):
        """
@@ -79,6 +97,13 @@ class Cholesky(Op):
        dz = gradients[0]
        chol_x = self(x)
+        # Replace the cholesky decomposition with 1 if there are nans
+        # or solve_upper_triangular will throw a ValueError.
+        if self.on_error == 'nan':
+            ok = ~tensor.any(tensor.isnan(chol_x))
+            chol_x = tensor.switch(ok, chol_x, 1)
+            dz = tensor.switch(ok, dz, 1)
        # deal with upper triangular by converting to lower triangular
        if not self.lower:
            chol_x = chol_x.T
@@ -97,9 +122,14 @@ class Cholesky(Op):
            chol_x, tril_and_halve_diagonal(chol_x.T.dot(dz)))
        if self.lower:
-            return [tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))]
+            grad = tensor.tril(s + s.T) - tensor.diag(tensor.diagonal(s))
+        else:
+            grad = tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))
+        if self.on_error == 'nan':
+            return [tensor.switch(ok, grad, np.nan)]
        else:
-            return [tensor.triu(s + s.T) - tensor.diag(tensor.diagonal(s))]
+            return [grad]
 cholesky = Cholesky()

--- a/theano/tensor/tests/test_slinalg.py
+++ b/theano/tensor/tests/test_slinalg.py
@@ -10,7 +10,7 @@ from numpy import inf
 import itertools
 import theano
-from theano import tensor, function
+from theano import tensor, function, grad
 from theano.tensor.basic import _allclose
 from theano.tests.test_rop import break_op
 from theano.tests import unittest_tools as utt
@@ -67,6 +67,23 @@ def test_cholesky():
    chol = Cholesky(lower=False)(x)
    ch_f = function([x], chol)
    yield check_upper_triangular, pd, ch_f
+    chol = Cholesky(lower=False, on_error='nan')(x)
+    ch_f = function([x], chol)
+    yield check_upper_triangular, pd, ch_f
+def test_cholesky_indef():
+    if not imported_scipy:
+        raise SkipTest("Scipy needed for the Cholesky op.")
+    x = tensor.matrix()
+    matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX)
+    cholesky = Cholesky(lower=True, on_error='raise')
+    chol_f = function([x], cholesky(x))
+    with assert_raises(scipy.linalg.LinAlgError):
+        chol_f(matrix)
+    cholesky = Cholesky(lower=True, on_error='nan')
+    chol_f = function([x], cholesky(x))
+    assert np.all(np.isnan(chol_f(matrix)))
 def test_cholesky_grad():
@@ -88,6 +105,20 @@ def test_cholesky_grad():
                                   [r], 3, rng))
+def test_cholesky_grad_indef():
+    if not imported_scipy:
+        raise SkipTest("Scipy needed for the Cholesky op.")
+    x = tensor.matrix()
+    matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX)
+    cholesky = Cholesky(lower=True, on_error='raise')
+    chol_f = function([x], grad(cholesky(x).sum(), [x]))
+    with assert_raises(scipy.linalg.LinAlgError):
+        chol_f(matrix)
+    cholesky = Cholesky(lower=True, on_error='nan')
+    chol_f = function([x], grad(cholesky(x).sum(), [x]))
+    assert np.all(np.isnan(chol_f(matrix)))
 @attr('slow')
 def test_cholesky_and_cholesky_grad_shape():
    if not imported_scipy: