Merge pull request #339 from pascanur/sandbox_pinv

Sandbox pseudo-inverse

Merge pull request #339 from pascanur/sandbox_pinv
117e3938 · Olivier Delalleau · 6384da65 · 676d7810 · 117e3938 · 117e3938
--- a/theano/sandbox/linalg/ops.py
+++ b/theano/sandbox/linalg/ops.py
@@ -20,32 +20,42 @@ except ImportError:
    # some ops (e.g. Cholesky, Solve, A_Xinv_b) won't work
    imported_scipy = False
 class Hint(Op):
    """
    Provide arbitrary information to the optimizer
    These ops are removed from the graph during canonicalization
    in order to not interfere with other optimizations.
-    The idea is that prior to canonicalization, one or more Features of the env should
+    The idea is that prior to canonicalization, one or more Features of the
-    register the information contained in any Hint node, and transfer that information out of
+    env should register the information contained in any Hint node, and
-    the graph.
+    transfer that information out of the graph.
    """
    def __init__(self, **kwargs):
        self.hints = tuple(kwargs.items())
-        self.view_map = {0:[0]}
+        self.view_map = {0: [0]}
    def __eq__(self, other):
        return type(self) == type(other) and self.hints == other.hints
    def __hash__(self):
        return hash((type(self), self.hints))
    def make_node(self, x):
        return Apply(self, [x], [x.type()])
    def perform(self, node, inputs, outstor):
        outstor[0][0] = inputs[0]
    def grad(self, inputs, g_out):
        return g_out
 def is_hint_node(node):
    return isinstance(node.op, Hint)
 def hints(variable):
    if hasattr(variable, 'env'):
        try:
@@ -58,13 +68,14 @@ def hints(variable):
        else:
            return {}
 @register_canonicalize
 @local_optimizer([])
 def remove_hint_nodes(node):
    if is_hint_node(node):
        # transfer hints from graph to Feature
        try:
-            for k,v in node.op.hints:
+            for k, v in node.op.hints:
                node.env.hints_feature.add_hint(node.inputs[0], k, v)
        except AttributeError:
            pass
@@ -75,29 +86,34 @@ class HintsFeature(object):
    """
    Env Feature to track matrix properties
-    This is a similar feature to variable 'tags'. In fact, tags are one way to provide hints.
+    This is a similar feature to variable 'tags'. In fact, tags are one way
+    to provide hints.
-    This class exists because tags were not documented well, and the semantics of how tag
+    This class exists because tags were not documented well, and the
-    information should be moved around during optimizations was never clearly spelled out.
+    semantics of how tag information should be moved around during
+    optimizations was never clearly spelled out.
    Hints are assumptions about mathematical properties of variables.
    If one variable is substituted for another by an optimization,
-    then it means that the assumptions should be transferred to the new variable.
+    then it means that the assumptions should be transferred to the
+    new variable.
-    Hints are attached to 'positions in a graph' rather than to variables in particular,
+    Hints are attached to 'positions in a graph' rather than to variables
-    although Hints are originally attached to a particular positition in a graph *via* a
+    in particular, although Hints are originally attached to a particular
-    variable in that original graph.
+    positition in a graph *via* a variable in that original graph.
    Examples of hints are:
    - shape information
    - matrix properties (e.g. symmetry, psd, banded, diagonal)
-    Hint information is propagated through the graph similarly to graph optimizations,
+    Hint information is propagated through the graph similarly to graph
-    except that adding a hint does not change the graph.  Adding a hint is not something that
+    optimizations, except that adding a hint does not change the graph.
-    debugmode will check.
+    Adding a hint is not something that debugmode will check.
-    #TODO: should a Hint be an object that can actually evaluate its truthfulness?
+    #TODO: should a Hint be an object that can actually evaluate its
-    #      Should the PSD property be an object that can check the PSD-ness of a variable?
+    #      truthfulness?
+    #      Should the PSD property be an object that can check the
+    #      PSD-ness of a variable?
    """
    def add_hint(self, r, k, v):
@@ -107,6 +123,7 @@ class HintsFeature(object):
    def ensure_init_r(self, r):
        if r not in self.hints:
            self.hints[r] = {}
    #
    #
    # Feature inteface
@@ -115,7 +132,8 @@ class HintsFeature(object):
    def on_attach(self, env):
        assert not hasattr(env, 'hints_feature')
        env.hints_feature = self
-        self.hints = {} # Variable -> tuple(scalars) or None  (All tensor vars map to tuple)
+        # Variable -> tuple(scalars) or None  (All tensor vars map to tuple)
+        self.hints = {}
        for node in env.toposort():
            self.on_import(env, node)
@@ -133,7 +151,7 @@ class HintsFeature(object):
    def update_second_from_first(self, r0, r1):
        old_hints = self.hints[r0]
        new_hints = self.hints[r1]
-        for k,v in old_hints.items():
+        for k, v in old_hints.items():
            if k in new_hints and new_hints[k] is not v:
                raise NotImplementedError()
            if k not in new_hints:
@@ -151,6 +169,7 @@ class HintsFeature(object):
        # 1) we are trying to get rid of r, or
        # 2) we are putting things back after a failed transaction.
 class HintsOptimizer(Optimizer):
    """Optimizer that serves to add HintsFeature as an env feature.
    """
@@ -163,7 +182,11 @@ class HintsOptimizer(Optimizer):
    def apply(self, env):
        pass
 # -1 should make it run right before the first merge
-theano.compile.mode.optdb.register('HintsOpt', HintsOptimizer(), -1, 'fast_run', 'fast_compile')
+theano.compile.mode.optdb.register('HintsOpt',
+                                   HintsOptimizer(),
+                                   -1,
+                                   'fast_run',
+                                   'fast_compile')
 def psd(v):
@@ -176,8 +199,12 @@ def psd(v):
 def is_psd(v):
    return hints(v).get('psd', False)
 def is_symmetric(v):
    return hints(v).get('symmetric', False)
 def is_positive(v):
    if hints(v).get('positive', False):
        return True
@@ -200,7 +227,7 @@ def inv_as_solve(node):
    if not imported_scipy:
        return False
    if node.op == dot:
-        l,r = node.inputs
+        l, r = node.inputs
        if l.owner and l.owner.op == matrix_inverse:
            return [solve(l.owner.inputs[0], r)]
        if r.owner and r.owner.op == matrix_inverse:
@@ -209,6 +236,7 @@ def inv_as_solve(node):
            else:
                return [solve(r.owner.inputs[0].T, l.T).T]
 @register_canonicalize
 @register_stabilize
 @register_specialize
@@ -216,16 +244,17 @@ def inv_as_solve(node):
 def no_transpose_symmetric(node):
    if isinstance(node.op, DimShuffle):
        x = node.inputs[0]
-        if x.type.ndim==2 and is_symmetric(x):
+        if x.type.ndim == 2 and is_symmetric(x):
            #print 'UNDOING TRANSPOSE', is_symmetric(x), x.ndim
-            if node.op.new_order == [1,0]:
+            if node.op.new_order == [1, 0]:
                return [x]
 @register_stabilize
 @local_optimizer([])
 def psd_solve_with_chol(node):
    if node.op == solve:
-        A, b = node.inputs #result is solution Ax=b
+        A, b = node.inputs  # result is solution Ax=b
        if is_psd(A):
            L = cholesky(A)
            #N.B. this can be further reduced to a yet-unwritten cho_solve Op
@@ -235,6 +264,7 @@ def psd_solve_with_chol(node):
            x = Solve('upper_triangular')(L.T, Li_b)
            return [x]
 @register_stabilize
 @register_specialize
 @local_optimizer([])
@@ -246,10 +276,10 @@ def local_det_chol(node):
    """
    if node.op == det:
        x, = node.inputs
-        for (cl,xpos) in x.clients:
+        for (cl, xpos) in x.clients:
            if isinstance(cl.op, Cholesky):
                L = cl.outputs[0]
-                return [tensor.prod(extract_diag(L)**2)]
+                return [tensor.prod(extract_diag(L) ** 2)]
 @register_canonicalize
@@ -268,8 +298,9 @@ def local_log_prod_sqr(node):
            if is_positive(p):
                return [tensor.log(p).sum(axis=x.owner.op.axis)]
-            #TODO: have a reduction like prod and sum that simply returns the sign
+            #TODO: have a reduction like prod and sum that simply
-            #      of the prod multiplication.
+            #      returns the sign of the prod multiplication.
 @register_canonicalize
 @register_stabilize
@@ -444,6 +475,44 @@ class CholeskyGrad(Op):
        return [shapes[0]]
+class MatrixPinv(Op):
+    def __init__(self):
+        pass
+    def props(self):
+        """Function exposing different properties of each instance of the
+        op.
+        For the ``MatrixPinv`` op, there are no properties to be exposed.
+        """
+        return ()
+    def __hash__(self):
+        return hash((type(self), self.props()))
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+    def make_node(self, x):
+        x = as_tensor_variable(x)
+        return Apply(self, [x], [x.type()])
+    def perform(self, node, (x,), (z, )):
+        try:
+            if imported_scipy:
+                z[0] = scipy.linalg.pinv(x).astype(x.dtype)
+            else:
+                z[0] = numpy.linalg.pinv(x).astype(x.dtype)
+        except numpy.linalg.LinAlgError:
+            logger.debug('Failed to invert %s' % str(node.inputs[0]))
+            raise
+    def __str__(self):
+        return "MatrixPseudoInverse"
+pinv = MatrixPinv()
 class MatrixInverse(Op):
    """Computes the inverse of a matrix :math:`A`.
@@ -470,7 +539,7 @@ class MatrixInverse(Op):
        return hash((type(self), self.props()))
    def __eq__(self, other):
-        return (type(self)==type(other) and self.props() == other.props())
+        return (type(self) == type(other) and self.props() == other.props())
    def make_node(self, x):
        x = as_tensor_variable(x)
@@ -500,7 +569,7 @@ class MatrixInverse(Op):
        xi = self(x)
        gz, = g_outputs
        #TT.dot(gz.T,xi)
-        return [-matrix_dot(xi,gz.T,xi).T]
+        return [-matrix_dot(xi, gz.T, xi).T]
    def R_op(self, inputs, eval_points):
        """The gradient function should return:
@@ -520,35 +589,43 @@ class MatrixInverse(Op):
        ev, = eval_points
        if ev is None:
            return [None]
-        #TT.dot(gz.T,xi)
+        return [-matrix_dot(xi, ev, xi)]
-        return [-matrix_dot(xi,ev,xi)]
    def __str__(self):
        return "MatrixInverse"
 matrix_inverse = MatrixInverse()
 class Solve(Op):
    """Solve a system of linear equations"""
-    def __init__(self, A_structure='general', lower=False, overwrite_A=False, overwrite_b=False):
+    def __init__(self,
+                 A_structure='general',
+                 lower=False,
+                 overwrite_A=False,
+                 overwrite_b=False):
        if A_structure not in MATRIX_STRUCTURES:
            raise ValueError('Invalid matrix structure argument', A_structure)
        self.A_structure = A_structure
-        self.lower=lower
+        self.lower = lower
-        self.overwrite_A=overwrite_A
+        self.overwrite_A = overwrite_A
-        self.overwrite_b=overwrite_b
+        self.overwrite_b = overwrite_b
    def props(self):
        return (self.A_structure,
                self.lower,
                self.overwrite_A,
                self.overwrite_b)
    def __hash__(self):
-        return hash((type(self),self.props()))
+        return hash((type(self), self.props()))
    def __eq__(self, other):
        return type(self) == type(other) and self.props() == other.props()
    def __repr__(self):
-        return 'Solve{%s}'%str(self.props())
+        return 'Solve{%s}' % str(self.props())
    def make_node(self, A, b):
        assert imported_scipy, (
            "Scipy not available. Scipy is needed for the Solve op")
@@ -556,30 +633,34 @@ class Solve(Op):
        b = as_tensor_variable(b)
        otype = tensor.tensor(
                broadcastable=b.broadcastable,
-                dtype = (A*b).dtype)
+                dtype=(A * b).dtype)
-        return Apply(self, [A,b], [otype])
+        return Apply(self, [A, b], [otype])
    def perform(self, node, inputs, output_storage):
        A, b = inputs
        #TODO: use the A_structure to go faster
-        output_storage[0][0] = scipy.linalg.solve(A,b)
+        output_storage[0][0] = scipy.linalg.solve(A, b)
-solve = Solve() # general solve
+solve = Solve()  # general solve
 #TODO : SolveTriangular
-#TODO: Optimizations to replace multiplication by matrix inverse with solve() Op (still unwritten)
+#TODO: Optimizations to replace multiplication by matrix inverse
+#      with solve() Op (still unwritten)
 class ExtractDiag(Op):
    """ Return the diagonal of a matrix. """
    def __init__(self, view=False):
        self.view = view
        if self.view:
-            self.view_map = {0:[0]}
+            self.view_map = {0: [0]}
    def __eq__(self, other):
        return type(self) == type(other) and self.view == other.view
    def __hash__(self):
-        return hash(type(self))^hash(self.view)
+        return hash(type(self)) ^ hash(self.view)
    def make_node(self, _x):
        x = as_tensor_variable(_x)
@@ -588,7 +669,8 @@ class ExtractDiag(Op):
        return Apply(self, [x], [tensor.vector(dtype=x.type.dtype)])
    def perform(self, node, ins, outs):
-        """ For some reason numpy.diag(x) is really slow, so we implemented our own. """
+        """ For some reason numpy.diag(x) is really slow, so we
+        implemented our own. """
        x, = ins
        z, = outs
@@ -597,24 +679,26 @@ class ExtractDiag(Op):
            z[0] = numpy.zeros(0)
            return
-        if x.shape[0] < x.shape [1]:
+        if x.shape[0] < x.shape[1]:
-            rval = x[:,0]
+            rval = x[:, 0]
        else:
            rval = x[0]
-        rval.strides = (x.strides[0]+x.strides[1],)
+        rval.strides = (x.strides[0] + x.strides[1],)
        if self.view:
            z[0] = rval
        else:
            z[0] = rval.copy()
    def __str__(self):
-        return 'ExtractDiag{view=%s}'%self.view
+        return 'ExtractDiag{view=%s}' % self.view
    def grad(self, inputs, g_outputs):
        x = tensor.zeros_like(inputs[0])
        xdiag = alloc_diag(g_outputs[0])
-        return [tensor.set_subtensor(x[:xdiag.shape[0], :xdiag.shape[1]], xdiag)]
+        return [tensor.set_subtensor(
+            x[:xdiag.shape[0], :xdiag.shape[1]],
+            xdiag)]
    def infer_shape(self, node, shapes):
        x_s, = shapes
@@ -651,7 +735,7 @@ class AllocDiag(Op):
    def infer_shape(self, node, shapes):
        x_s, = shapes
-        return [(x_s[0],x_s[0])]
+        return [(x_s[0], x_s[0])]
 alloc_diag = AllocDiag()
@@ -667,7 +751,7 @@ def diag(x):
    xx = as_tensor_variable(x)
    if xx.type.ndim == 1:
        return alloc_diag(xx)
-    elif xx.type.ndim ==2:
+    elif xx.type.ndim == 2:
        return extract_diag(xx)
    else:
        raise TypeError('diag requires vector or matrix argument', x)
@@ -681,22 +765,22 @@ class Det(Op):
        x = as_tensor_variable(x)
        o = theano.tensor.scalar(dtype=x.dtype)
        return Apply(self, [x], [o])
    def perform(self, node, (x,), (z, )):
        try:
            z[0] = numpy.asarray(numpy.linalg.det(x), dtype=x.dtype)
        except Exception:
            print 'Failed to compute determinant', x
            raise
    def grad(self, inputs, g_outputs):
        gz, = g_outputs
        x, = inputs
        return [gz * self(x) * matrix_inverse(x).T]
    def infer_shape(self, node, shapes):
        return [()]
    def __str__(self):
        return "Det"
 det = Det()
@@ -734,7 +818,8 @@ def spectral_radius_bound(X, log2_exponent):
        XX = tensor.dot(XX, XX)
    return tensor.pow(
            trace(XX),
-            2**(-log2_exponent))
+            2 ** (-log2_exponent))
 class A_Xinv_b(Op):
    """Product of form a inv(X) b"""
@@ -745,9 +830,10 @@ class A_Xinv_b(Op):
        b = as_tensor_variable(b)
        X = as_tensor_variable(X)
        o = theano.tensor.matrix(dtype=x.dtype)
-        return Apply(self, [a,X,b], [o])
+        return Apply(self, [a, X, b], [o])
    def perform(self, ndoe, inputs, outstor):
-        a,X,b = inputs
+        a, X, b = inputs
        if 1:
            L_factor = scipy.linalg.cho_factor(X)
            xb = scipy.linalg.cho_solve(L_factor, b)
@@ -755,10 +841,11 @@ class A_Xinv_b(Op):
            z = numpy.dot(xa.T, xb)
        else:
            raise NotImplementedError(self.X_structure)
-        outstor[0][0]=z
+        outstor[0][0] = z
    def grad(self, inputs, g_outputs):
        gz, = g_outputs
-        a,X,b = inputs
+        a, X, b = inputs
        iX = matrix_inverse(X)
        ga = matrix_dot(gz, b.T, iX.T)
        gX = -matrix_dot(iX.T, a, gz, b.T, iX.T)

--- a/theano/sandbox/linalg/tests/test_linalg.py
+++ b/theano/sandbox/linalg/tests/test_linalg.py
 from pkg_resources import parse_version as V
 import numpy
+import numpy.linalg
 import theano
 from theano import tensor, function
@@ -11,9 +12,10 @@ from theano import config
 # The one in comment are not tested...
 from theano.sandbox.linalg.ops import (cholesky,
-                                       Cholesky, # op class
+                                       Cholesky,  # op class
                                       CholeskyGrad,
                                       matrix_inverse,
+                                       pinv,
                                       #solve,
                                       diag,
                                       ExtractDiag,
@@ -109,7 +111,7 @@ def test_cholesky_and_cholesky_grad_shape():
 def test_inverse_correctness():
    rng = numpy.random.RandomState(utt.fetch_seed())
-    r = rng.randn(4,4).astype(theano.config.floatX)
+    r = rng.randn(4, 4).astype(theano.config.floatX)
    x = tensor.matrix()
    xi = matrix_inverse(x)
@@ -118,13 +120,31 @@ def test_inverse_correctness():
    assert ri.shape == r.shape
    assert ri.dtype == r.dtype
-    rir = numpy.dot(ri,r)
+    rir = numpy.dot(ri, r)
-    rri = numpy.dot(r,ri)
+    rri = numpy.dot(r, ri)
    assert _allclose(numpy.identity(4), rir), rir
    assert _allclose(numpy.identity(4), rri), rri
+def test_pseudoinverse_correctness():
+    rng = numpy.random.RandomState(utt.fetch_seed())
+    d1 = rng.randint(4) + 2
+    d2 = rng.randint(4) + 2
+    r = rng.randn(d1, d2).astype(theano.config.floatX)
+    x = tensor.matrix()
+    xi = pinv(x)
+    ri = function([x], xi)(r)
+    assert ri.shape[0] == r.shape[1]
+    assert ri.shape[1] == r.shape[0]
+    assert ri.dtype == r.dtype
+    # Note that pseudoinverse can be quite unprecise so I prefer to compare
+    # the result with what numpy.linalg returns
+    assert _allclose(ri, numpy.linalg.pinv(r))
 def test_matrix_dot():
    rng = numpy.random.RandomState(utt.fetch_seed())
    n = rng.randint(4) + 2
@@ -162,37 +182,39 @@ def test_inverse_grad():
    rng = numpy.random.RandomState(utt.fetch_seed())
-    r = rng.randn(4,4)
+    r = rng.randn(4, 4)
    tensor.verify_grad(matrix_inverse, [r], rng=numpy.random)
 def test_rop_lop():
    mx = tensor.matrix('mx')
    mv = tensor.matrix('mv')
-    v  = tensor.vector('v')
+    v = tensor.vector('v')
    y = matrix_inverse(mx).sum(axis=0)
    yv = tensor.Rop(y, mx, mv)
    rop_f = function([mx, mv], yv)
-    sy, _ = theano.scan( lambda i,y,x,v: (tensor.grad(y[i],x)*v).sum(),
+    sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
-                       sequences = tensor.arange(y.shape[0]),
+                       sequences=tensor.arange(y.shape[0]),
-                       non_sequences = [y,mx,mv])
+                       non_sequences=[y, mx, mv])
-    scan_f = function([mx,mv], sy)
+    scan_f = function([mx, mv], sy)
    rng = numpy.random.RandomState(utt.fetch_seed())
-    vx = numpy.asarray(rng.randn(4,4), theano.config.floatX)
+    vx = numpy.asarray(rng.randn(4, 4), theano.config.floatX)
-    vv = numpy.asarray(rng.randn(4,4), theano.config.floatX)
+    vv = numpy.asarray(rng.randn(4, 4), theano.config.floatX)
-    v1 = rop_f(vx,vv)
+    v1 = rop_f(vx, vv)
-    v2 = scan_f(vx,vv)
+    v2 = scan_f(vx, vv)
    assert _allclose(v1, v2), ('ROP mismatch: %s %s' % (v1, v2))
    raised = False
    try:
-        tmp = tensor.Rop(theano.clone(y,
+        tmp = tensor.Rop(
-                                      replace={mx:break_op(mx)}), mx, mv)
+            theano.clone(y, replace={mx: break_op(mx)}),
+            mx,
+            mv)
    except ValueError:
        raised = True
    if not raised:
@@ -204,7 +226,7 @@ def test_rop_lop():
    yv = tensor.Lop(y, mx, v)
    lop_f = function([mx, v], yv)
-    sy = tensor.grad((v*y).sum(), mx)
+    sy = tensor.grad((v * y).sum(), mx)
    scan_f = function([mx, v], sy)
    v1 = lop_f(vx, vv)
@@ -280,13 +302,15 @@ def test_alloc_diag_grad():
 def test_diag():
    """
    Test that linalg.diag has the same behavior as numpy.diag.
-    numpy.diag has two behaviors: 
+    numpy.diag has two behaviors:
-    (1) when given a vector, it returns a matrix with that vector as the diagonal.
+    (1) when given a vector, it returns a matrix with that vector as the
-    (2) when given a matrix, returns a vector which is the diagonal of the matrix.
+    diagonal.
+    (2) when given a matrix, returns a vector which is the diagonal of the
-    (1) and (2) are tested by test_alloc_diag and test_extract_diag respectively.
+    matrix.
-    This test makes sure that linalg.diag instantiates the right op based on the dimension of
-    the input.
+    (1) and (2) are tested by test_alloc_diag and test_extract_diag
+    respectively. This test makes sure that linalg.diag instantiates
+    the right op based on the dimension of the input.
    """
    # test that it builds a matrix with given diagonal when using vector inputs
@@ -298,7 +322,7 @@ def test_diag():
    x = theano.tensor.matrix()
    y = extract_diag(x)
    assert y.owner.op.__class__ == ExtractDiag
    # other types should raise error
    x = theano.tensor.tensor3()
    ok = False
@@ -315,7 +339,7 @@ def test_extract_diag():
    g = extract_diag(x)
    f = theano.function([x], g)
-    for shp in [(2, 3), (3, 2), (3, 3), (1,1), (0,0)]:
+    for shp in [(2, 3), (3, 2), (3, 3), (1, 1), (0, 0)]:
        m = rng.rand(*shp).astype(config.floatX)
        v = numpy.diag(m)
        r = f(m)
@@ -343,7 +367,7 @@ def test_extract_diag():
 def test_extract_diag_grad():
    rng = numpy.random.RandomState(utt.fetch_seed())
-    x = rng.rand(5,4)
+    x = rng.rand(5, 4)
    tensor.verify_grad(extract_diag, [x], rng=rng)