Improvements to sparse handling

da637496 · turian@grenat.iro.umontreal.ca · 13a8641c · da637496 · da637496 · da637496
--- a/_test_sparse.py
+++ b/_test_sparse.py
@@ -2,6 +2,7 @@ from sparse import *

 import unittest
 import compile
+import gradient

 class T_transpose(unittest.TestCase):
    def setUp(self):
@@ -35,12 +36,13 @@ class T_Add(unittest.TestCase):
    def test0(self):
        sp_a = sparse.csc_matrix(sparse.speye(5,3))
        a = assparse(sp_a)
+        self.failUnless(a.data is sp_a)

        sp_b = sparse.csc_matrix(sparse.speye(5,3))
        b = assparse(sp_b)
+        self.failUnless(b.data is sp_b)

-        self.failUnless(a.data is sp_a)
-        apb = add_s_s(a, b)
+        apb = add(a, b)

        self.failUnless(apb.dtype == a.dtype, apb.dtype)
        self.failUnless(apb.format == a.format, apb.format)
@@ -55,7 +57,7 @@ class T_conversion(unittest.TestCase):

    def test0(self):
        a = tensor.astensor(numpy.random.rand(5))
-        s = sparse_from_dense(a,'csc')
+        s = sparse_from_dense(a, 'csc')
        val = compile.eval_outputs([s])
        self.failUnless(str(val.dtype)=='float64')
        self.failUnless(val.format == 'csc')
@@ -79,6 +81,7 @@ class T_conversion(unittest.TestCase):
 class _testCase_dot(unittest.TestCase):
    """ Types of sparse matrices to use for testing """
    mtypes = [sparse.csc_matrix, sparse.csr_matrix]
+    mtypes_str = ["csc", "csr"] 
    #mtypes = [sparse.csc_matrix, sparse.csr_matrix, sparse.dok_matrix, sparse.lil_matrix, sparse.coo_matrix]

    def setUp(self):
@@ -126,7 +129,6 @@ class _testCase_dot(unittest.TestCase):
            zop = transpose(dot(y, x))
            z = compile.eval_outputs([zop])
            self.failUnless(z.shape == (500,2))
-            print mtype, type(z)
 #            self.failUnless(type(z) is mtype)

            w = mtype((500,2))
@@ -147,61 +149,41 @@ class _testCase_dot(unittest.TestCase):
            w = w.todense()
            self.failUnless((z == w).all() == True)

-#    def test_basic1(self):
-#        """dot: sparse left"""
-#        a = numpy.asarray([[1, 0, 3, 0, 5], [0, 0, -2, 0, 0]],
-#                dtype='float64')
-#        b = numpy.random.rand(5, 3)
-#        for mtype in [sparse.csr_matrix, sparse.csc_matrix, sparse.dok_matrix,
-#                sparse.lil_matrix]:#, sparse.coo_matrix]:
-#            #print type(a), mtype
-#            m = mtype(a)
-#            ab = m.dot(b)
-#            try:
-#                z = dot(assparse(m), gof.Result(data=b))
-#                self.failUnless(z.data.shape == ab.shape)
-#                self.failUnless(type(z.data) == type(ab))
-#            except Exception, e:
-#                print 'cccc', mtype, e, str(e)
-#                raise
-
    def test_missing(self):
        raise NotImplementedError('tests commented out')

-#    def test_basic2(self):
-#        """dot: sparse right"""
-#        a = numpy.random.rand(2, 5)
-#        b = numpy.asarray([[1, 0, 3, 0, 5], [0, 0, -2, 0, 0]],
-#                dtype='float64').transpose()
-#
-#        for mtype in [sparse.csr_matrix, sparse.csc_matrix, sparse.dok_matrix,
-#                sparse.lil_matrix]:#, sparse.coo_matrix]:
-#            m = mtype(b)
-#            ab = m.transpose().dot(a.transpose()).transpose()
-#            z = dot(gof.Result(data=a),assparse(mtype(b)))
-#            self.failUnless(z.data.shape == ab.shape)
-#            self.failUnless(type(z.data) == type(ab))
-#
-#    def test_graph_bprop0(self):
+    def test_graph_bprop0(self):
 #        x = tensor.astensor(numpy.random.rand(10,2))
 #        w = assparse(sparse.csr_matrix(
 #                numpy.asarray([[1, 0, 3, 0, 5], [0, 0, -2, 0,0]],dtype='float64')
 #            ))
-#
-#        for epoch in xrange(50):
-#            xw = dense_from_sparse(dot(x, w))
-#            y = dense_from_sparse(dot(xw, transpose(w)))
-#            loss = core.sum(core.sqr(x-y))
-#            gy = y-x
-#            g = grad.Grad({y:gy})
-#            g.bprop()
-#            lr = 0.002
-#            g(w).data[1,0] = 0
-#            g(w).data[1,4] = 0
-#            w.data = -lr * g(w).data + w.data
-#
-#        self.failUnless('3.08560636025' == str(loss.data))
-#
+        for mtype in self.mtypes_str:
+#            x = tensor.astensor([[1., 2], [3, 4], [2, 1]])
+#            w = assparse(mtype((500,3)))
+#            w.data[(10, 1)] = 1
+#            w.data[(20, 2)] = 2
+
+            x = tensor.Tensor('float64', broadcastable=[False,False], name='x')
+            w = SparseResult('float64', mtype)
+            xw = dense_from_sparse(dot(x, w))
+            y = dense_from_sparse(dot(xw, w.T))
+            diff = x-y
+            loss = tensor.sum(tensor.sqr(diff))
+            gw = gradient.grad(loss, w)
+            trainfn = compile.Function([x, w], [y, loss, gw])
+
+#            for epoch in xrange(50):
+#                gy = y-x
+#                g = grad.Grad({y:gy})
+#                g.bprop()
+#                lr = 0.002
+#                g(w).data[1,0] = 0
+#                g(w).data[1,4] = 0
+#                w.data = -lr * g(w).data + w.data
+#                print loss.data
+
+            self.failUnless('3.08560636025' == str(loss.data))
+
 #    def test_graph_bprop1(self):
 #        x = tensor.astensor(numpy.random.rand(10,2))
 #        w = assparse(sparse.csr_matrix(

--- a/base_tensor.py
+++ b/base_tensor.py
@@ -108,7 +108,7 @@ class BaseTensor(Result):
    #
    def desc(self):
        """
-        Returns a hashable description of this BaseTensor.
+        Returns a hashable description of this L{BaseTensor}.
        """
        if self.data is not None:
            return (BaseTensor, self.dtype, self.broadcastable, self.data.data[:])

--- a/gradient.py
+++ b/gradient.py
@@ -120,6 +120,10 @@ def grad_sources_inputs(sources, graph_inputs):

 def grad(cost, param, g_cost=1.0):
    """
+    @type cost: L{Result}
+    @type param: L{Result} or list of L{Result}s.
+
+    @rtype: L{Result} or list of L{Result}s (depending upon I{param})
    @return: symbolic expression of gradient of I{cost} wrt I{param}.
    If I{param} is a list, then return a list containing the gradient of I{cost} wrt
    each element of the list.

--- a/sparse.py
+++ b/sparse.py
@@ -11,7 +11,46 @@ import numpy
 from scipy import sparse

 import gof.op, gof.result
-import tensor
+import tensor, base_tensor
+
+
+
+## Type checking
+
+def _is_sparse_result(x):
+    """
+    @rtype: boolean
+    @return: True iff x is a L{SparseResult} (and not a L{base_tensor.BaseTensor})
+    """
+    if not isinstance(x, SparseResult) and not isinstance(x, base_tensor.BaseTensor):
+        raise NotImplementedError("_is_sparse should only be called on sparse.SparseResult or base_tensor.BaseTensor, not,", x)
+    return isinstance(x, SparseResult)
+def _is_dense_result(x):
+    """
+    @rtype: boolean
+    @return: True unless x is a L{SparseResult} (and not a L{base_tensor.BaseTensor})
+    """
+    if not isinstance(x, SparseResult) and not isinstance(x, base_tensor.BaseTensor):
+        raise NotImplementedError("_is_sparse should only be called on sparse.SparseResult or base_tensor.BaseTensor, not,", x)
+    return isinstance(x, SparseResult)
+
+def _is_sparse(x):
+    """
+    @rtype: boolean
+    @return: True iff x is a L{scipy.sparse.spmatrix} (and not a L{numpy.ndarray})
+    """
+    if not isinstance(x, sparse.spmatrix) and not isinstance(x, numpy.ndarray):
+        raise NotImplementedError("_is_sparse should only be called on sparse.scipy.sparse.spmatrix or numpy.ndarray, not,", x)
+    return isinstance(x, sparse.spmatrix)
+def _is_dense(x):
+    """
+    @rtype: boolean
+    @return: True unless x is a L{scipy.sparse.spmatrix} (and not a L{numpy.ndarray})
+    """
+    if not isinstance(x, sparse.spmatrix) and not isinstance(x, numpy.ndarray):
+        raise NotImplementedError("_is_sparse should only be called on sparse.scipy.sparse.spmatrix or numpy.ndarray, not,", x)
+    return isinstance(x, numpy.ndarray)
+


 # Wrapper type
@@ -26,13 +65,14 @@ def assparse(sp, **kwargs):
    @todo Verify that sp is sufficiently sparse, and raise a warning if it is not
    """
    if isinstance(sp, SparseResult):
-        return sp
+        rval = sp
    else:
        # @todo Verify that sp is sufficiently sparse, and raise a
        # warning if it is not
        rval = SparseResult(str(sp.dtype), sp.format, **kwargs)
        rval.data = sp
-        return rval
+    assert _is_sparse_result(rval)
+    return rval

 class SparseResult(gof.result.Result):
    """
@@ -44,8 +84,7 @@ class SparseResult(gof.result.Result):

    Methods:

-    Notes:
-
+    @note As far as I can tell, L{scipy.sparse} objects must be matrices, i.e. have dimension 2.
    """
    format_cls = {
            'csr' : sparse.csr_matrix,
@@ -101,7 +140,6 @@ class SparseResult(gof.result.Result):
    def __add__(left, right): return add(left, right)
    def __radd__(right, left): return add(left, right)

-
 #
 # Conversion
 #
@@ -113,9 +151,11 @@ class DenseFromSparse(gof.op.Op):
        self.inputs = [assparse(x)]
        self.outputs = [tensor.Tensor(x.dtype,[0,0])]
    def impl(self, x):
+        assert _is_sparse(x)
        return numpy.asarray(x.todense())
-    def grad(self, x, gz): 
-        return sparse_from_dense(gz, x.format)
+    def grad(self, (x,), (gz,)):
+        assert _is_sparse_result(x) and _is_dense_result(gz)
+        return sparse_from_dense(gz, x.format),
 dense_from_sparse = gof.op.constructor(DenseFromSparse)

 class SparseFromDense(gof.op.Op):
@@ -130,9 +170,11 @@ class SparseFromDense(gof.op.Op):
    def impl(self, x, fmt):
        # this would actually happen anyway when we try to assign to
        # self.outputs[0].data, but that seems hackish -JB
+        assert _is_dense(x)
        return SparseResult.format_cls[fmt](x)
-    def grad(self, (x, fmt), gz):
-        return dense_from_sparse(gz)
+    def grad(self, (x, fmt), (gz,)):
+        assert _is_dense_result(x) and _is_sparse_result(gz)
+        return dense_from_sparse(gz), None
 sparse_from_dense = gof.op.constructor(SparseFromDense)

 # Linear Algebra
@@ -147,12 +189,15 @@ class Transpose(gof.op.Op):
        self.inputs = [x]
        self.outputs = [SparseResult(x.dtype, Transpose.format_map[x.format])]
    def impl(self, x):
+        assert _is_sparse(x)
        return x.transpose() 
-    def grad(self, x, gz): 
-        return transpose(gz)
+    def grad(self, (x,), (gz,)):
+        assert _is_sparse_result(x) and _is_sparse_result(gz)
+        return transpose(gz),
 transpose = gof.op.constructor(Transpose)

-class AddSS(gof.op.Op): #add two sparse matrices
+class AddSS(gof.op.Op):
+    ''' Add two sparse matrices '''
    def __init__(self, x, y, **kwargs):
        gof.op.Op.__init__(self, **kwargs)
        x, y = [assparse(x), assparse(y)]
@@ -163,10 +208,49 @@ class AddSS(gof.op.Op): #add two sparse matrices
            raise NotImplementedError()
        self.outputs = [SparseResult(x.dtype, x.format)]
    def impl(self, x,y): 
+        assert _is_sparse(x) and _is_sparse(y)
        return x + y
-    def grad(self, (x, y), gz):
+    def grad(self, (x, y), (gz,)):
+        assert _is_sparse_result(x) and _is_sparse_result(y)
+        assert _is_sparse_result(gz)
        return gz, gz
 add_s_s = gof.op.constructor(AddSS)
+class AddSD(gof.op.Op):
+    ''' Add a sparse and a dense matrix '''
+    def __init__(self, x, y, **kwargs):
+        gof.op.Op.__init__(self, **kwargs)
+        x, y = [assparse(x), tensor.astensor(y)]
+        self.inputs = [x, y]
+        if x.dtype != y.dtype:
+            raise NotImplementedError()
+        # The magic number two here arises because L{scipy.sparse}
+        # objects must be matrices (have dimension 2)
+        assert len(y.broadcastable) == 2
+        self.outputs = [tensor.Tensor(y.dtype, y.broadcastable)]
+    def impl(self, x,y): 
+        assert _is_sparse(x) and _is_dense(y)
+        return x + y
+    def grad(self, (x, y), (gz,)):
+        assert _is_sparse_result(x) and _is_dense_result(y)
+        assert _is_dense_result(gz)
+        return SparseFromDense(gz), gz
+add_s_d = gof.op.constructor(AddSD)
+def add(x,y):
+    """
+    Add two matrices, at least one of which is sparse.
+    """
+    if hasattr(x, 'getnnz'): x = assparse(x)
+    if hasattr(y, 'getnnz'): y = assparse(y)
+    
+    x_is_sparse_result = _is_sparse_result(x)
+    y_is_sparse_result = _is_sparse_result(y)
+
+    assert x_is_sparse_result or y_is_sparse_result
+    if x_is_sparse_result and y_is_sparse_result: return add_s_s(x,y)
+    elif x_is_sparse_result and not y_is_sparse_result: return add_s_d(x,y)
+    elif y_is_sparse_result and not x_is_sparse_result: return add_s_d(y,x)
+    else: raise NotImplementedError()
+

 class Dot(gof.op.Op):
    """
@@ -178,6 +262,8 @@ class Dot(gof.op.Op):
    when L{Dot} is in the middle of a larger graph, because the types
    of gy will match that of y. This conversion might be inefficient if
    the gradients are graph outputs though, hence this mask.
+
+    @todo: Simplify code by splitting into DotSS and DotSD.
    """
    def __init__(self, x, y, grad_preserves_dense=True):
        """
@@ -186,6 +272,7 @@ class Dot(gof.op.Op):
        if x.dtype != y.dtype:
            raise NotImplementedError()

+        assert _is_sparse_result(x)
        # These are the conversions performed by scipy.sparse.dot
        if x.format == "csc" or x.format == "coo":
            myformat = "csc"
@@ -204,9 +291,10 @@ class Dot(gof.op.Op):
        """
        self.outputs[0].data = self.inputs[0].data.dot(self.inputs[1].data)
    def grad(self, (x, y), (gz,)):
+        assert _is_sparse_result(gz)
        rval = [dot(gz, y.T), dot(x.T, gz)]
-        assert isinstance(self.inputs[0], SparseResult)
-        if not isinstance(self.inputs[1], SparseResult):
+        assert _is_sparse_result(x)
+        if _is_dense_result(y):
            if self.grad_preserves_dense:
                rval[1] = dense_from_sparse(rval[1])
        return rval
@@ -222,12 +310,12 @@ def dot(x, y, grad_preserves_dense=True):
    if hasattr(x, 'getnnz'): x = assparse(x)
    if hasattr(y, 'getnnz'): y = assparse(y)

-    x_is_sparse = isinstance(x, SparseResult)
-    y_is_sparse = isinstance(y, SparseResult)
-    if not x_is_sparse and not y_is_sparse:
+    x_is_sparse_result = _is_sparse_result(x)
+    y_is_sparse_result = _is_sparse_result(y)
+    if not x_is_sparse_result and not y_is_sparse_result:
        raise TypeError()
-    if x_is_sparse:
+    if x_is_sparse_result:
        return Dot(x,y,grad_preserves_dense).outputs[0]
    else:
-        assert y_is_sparse
+        assert y_is_sparse_result
        return transpose(Dot(y.T, x.T, grad_preserves_dense).outputs[0])
--- a/tensor.py
+++ b/tensor.py
@@ -84,7 +84,7 @@ def astensor(data, broadcastable=None, name=None):
            raise ValueError("Cannot rename an existing Tensor.")
        return data
    elif isinstance(data, Result):
-        raise TypeError("Cannot make a Tensor out of a non-Tensor result.")
+        raise TypeError("Cannot make a Tensor out of a non-Tensor result:", data)
        
    if data is None and broadcastable is None:
        raise TypeError("Cannot make a Tensor out of None.")