Merge pull request #208 from dwf/cholesky_review

Cholesky Op review/tests

Merge pull request #208 from dwf/cholesky_review
13bfa7d2 · nouiz · 840176c8 · 83f39106 · 13bfa7d2 · 13bfa7d2
--- a/theano/sandbox/linalg/ops.py
+++ b/theano/sandbox/linalg/ops.py
@@ -303,25 +303,34 @@ MATRIX_STRUCTURES = (
        'toeplitz',
        )

+
 class Cholesky(Op):
    """
    Return a triangular matrix square root of positive semi-definite `x`

-    L = cholesky(X, lower=True) implies dot(L.T,L)==X
+    L = cholesky(X, lower=True) implies dot(L, L.T) == X
    """
    #TODO: inplace
    #TODO: for specific dtypes
+    #TODO: LAPACK wrapper with in-place behavior, for solve also
    def __init__(self, lower=True):
        self.lower = lower
        self.destructive = False
+
    def props(self):
        return (self.lower,
                self.destructive)
+
    def __hash__(self):
        return hash((type(self), self.props()))
+
    def __eq__(self, other):
-        return (type(self)==type(other) and self.props() == other.props())
-    def __repr__(self):
+        return (type(self) == type(other) and self.props() == other.props())
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
+    def __str__(self):
        if self.lower:
            lu = 'lower'
        else:
@@ -331,15 +340,104 @@ class Cholesky(Op):
        else:
            destr = 'non-destructive'
        return 'Cholesky{%s,%s}' % (lu, destr)
+
    def make_node(self, x):
        x = as_tensor_variable(x)
        return Apply(self, [x], [x.type()])
-    def perform(self, node, (x,), (z,)):
+
+    def perform(self, node, inputs, outputs):
+        x = inputs[0]
+        z = outputs[0]
        z[0] = scipy.linalg.cholesky(x, lower=self.lower).astype(x.dtype)
-    #def grad(self, (x, y), (gz,)):
-        #return dot(gz, y), dot(x, gz) #no transposing necessary
+
+    def grad(self, inputs, gradients):
+        return [CholeskyGrad(self.lower)(inputs[0], self(inputs[0]),
+                                         gradients[0])]
+
 cholesky = Cholesky()

+
+class CholeskyGrad(Op):
+    def __init__(self, lower=True):
+        self.lower = lower
+        self.destructive = False
+
+    def props(self):
+        return (self.lower,
+                self.destructive)
+
+    def __hash__(self):
+        return hash((type(self), self.props()))
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+
+    def __str__(self):
+        if self.lower:
+            lu = 'lower'
+        else:
+            lu = 'upper'
+        if self.destructive:
+            destr = 'destructive'
+        else:
+            destr = 'non-destructive'
+        return 'CholeskyGrad{%s,%s}' % (lu, destr)
+
+    def make_node(self, x, l, dz):
+        x = as_tensor_variable(x)
+        l = as_tensor_variable(l)
+        dz = as_tensor_variable(dz)
+        assert l.owner.op.lower == self.lower, (
+            "lower/upper mismatch between Cholesky op and CholeskyGrad op"
+        )
+        return Apply(self, [x, l, dz], [x.type()])
+
+    def perform(self, node, inputs, outputs):
+        """
+        Implements the "reverse-mode" gradient for the Cholesky factorization
+        of a positive-definite matrix.
+
+        References
+        ----------
+        .. [1] S. P. Smith. "Differentiation of the Cholesky Algorithm".
+               Journal of Computational and Graphical Statistics,
+               Vol. 4, No. 2 (Jun.,1995), pp. 134-147
+               http://www.jstor.org/stable/1390762
+        """
+        x = inputs[0]
+        L = inputs[1]
+        dz = inputs[2]
+        dx = outputs[0]
+        N = x.shape[0]
+        if self.lower:
+            F = numpy.tril(dz)
+            for k in xrange(N - 1, -1, -1):
+                for j in xrange(k + 1, N):
+                    for i in xrange(j, N):
+                        F[i, k] -= F[i, j] * L[j, k]
+                        F[j, k] -= F[i, j] * L[i, k]
+                for j in xrange(k + 1, N):
+                    F[j, k] /= L[k, k]
+                    F[k, k] -= L[j, k] * F[j, k]
+                F[k, k] /= (2 * L[k, k])
+        else:
+            F = numpy.triu(dz)
+            M = N - 1
+            for k in xrange(N - 1, -1, -1):
+                for j in xrange(k + 1, N):
+                    for i in xrange(j, N):
+                        F[k, i] -= F[j, i] * L[k, j]
+                        F[k, j] -= F[j, i] * L[k, i]
+                for j in xrange(k + 1, N):
+                    F[k, j] /= L[k, k]
+                    F[k, k] -= L[k, j] * F[k, j]
+                F[k, k] /= (2 * L[k, k])
+        dx[0] = F
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
+
 class MatrixInverse(Op):
    """Computes the inverse of a matrix :math:`A`.


--- a/theano/sandbox/linalg/tests/test_linalg.py
+++ b/theano/sandbox/linalg/tests/test_linalg.py
@@ -11,6 +11,8 @@ from theano import config

 # The one in comment are not tested...
 from theano.sandbox.linalg.ops import (cholesky,
+                                       Cholesky, # op class
+                                       CholeskyGrad,
                                       matrix_inverse,
                                       #solve,
                                       #diag,
@@ -27,29 +29,71 @@ from theano.sandbox.linalg.ops import (cholesky,
 from nose.plugins.skip import SkipTest


-if 0:
-    def test_cholesky():
-        #TODO: test upper and lower triangular
-        #todo: unittest randomseed
-        rng = numpy.random.RandomState(utt.fetch_seed())
+def check_lower_triangular(pd, ch_f):
+    ch = ch_f(pd)
+    assert ch[0, pd.shape[1] - 1] == 0
+    assert ch[pd.shape[0] - 1, 0] != 0
+    assert numpy.allclose(numpy.dot(ch, ch.T), pd)
+    assert not numpy.allclose(numpy.dot(ch.T, ch), pd)

-        r = rng.randn(5,5)

-        pd = numpy.dot(r,r.T)
+def check_upper_triangular(pd, ch_f):
+    ch = ch_f(pd)
+    assert ch[4, 0] == 0
+    assert ch[0, 4] != 0
+    assert numpy.allclose(numpy.dot(ch.T, ch), pd)
+    assert not numpy.allclose(numpy.dot(ch, ch.T), pd)

-        x = tensor.matrix()
-        chol = cholesky(x)
-        f = function([x], tensor.dot(chol, chol.T)) # an optimization could remove this

-        ch_f = function([x], chol)
+def test_cholesky():
+    rng = numpy.random.RandomState(utt.fetch_seed())
+    r = rng.randn(5, 5)
+    pd = numpy.dot(r, r.T)
+    x = tensor.matrix()
+    chol = cholesky(x)
+    # Check the default.
+    ch_f = function([x], chol)
+    yield check_lower_triangular, pd, ch_f
+    # Explicit lower-triangular.
+    chol = Cholesky(lower=True)(x)
+    ch_f = function([x], chol)
+    yield check_lower_triangular, pd, ch_f
+    # Explicit upper-triangular.
+    chol = Cholesky(lower=False)(x)
+    ch_f = function([x], chol)
+    yield check_upper_triangular, pd, ch_f
+
+
+def test_cholesky_grad():
+    rng = numpy.random.RandomState(utt.fetch_seed())
+    r = rng.randn(5, 5)
+    pd = numpy.dot(r, r.T)
+    # Check the default.
+    yield utt.verify_grad, cholesky, [pd], 3, rng
+    # Explicit lower-triangular.
+    yield utt.verify_grad, Cholesky(lower=True), [pd], 3, rng
+    # Explicit upper-triangular.
+    yield utt.verify_grad, Cholesky(lower=False), [pd], 3, rng

-        # quick check that chol is upper-triangular
-        ch = ch_f(pd)
-        print ch
-        assert ch[0,4] != 0
-        assert ch[4,0] == 0
-        assert numpy.allclose(numpy.dot(ch.T,ch),pd)
-        assert not numpy.allclose(numpy.dot(ch,ch.T),pd)
+
+def test_cholesky_and_cholesky_grad_shape():
+    rng = numpy.random.RandomState(utt.fetch_seed())
+    x = tensor.matrix()
+    for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)):
+        f_chol = theano.function([x], l.shape)
+        g = tensor.grad(l.sum(), x)
+        f_cholgrad = theano.function([x], g.shape)
+        topo_chol = f_chol.maker.env.toposort()
+        topo_cholgrad = f_cholgrad.maker.env.toposort()
+        if config.mode != 'FAST_COMPILE':
+            assert sum([node.op.__class__ == Cholesky
+                        for node in topo_chol]) == 0
+            assert sum([node.op.__class__ == CholeskyGrad
+                        for node in topo_cholgrad]) == 0
+        for shp in [2, 3, 5]:
+            m = numpy.cov(rng.randn(shp, shp + 10)).astype(config.floatX)
+            yield numpy.testing.assert_equal, f_chol(m), (shp, shp)
+            yield numpy.testing.assert_equal, f_cholgrad(m), (shp, shp)


 def test_inverse_correctness():