bringing back sparse

f5801429 · james@mackie · a7e53331 · f5801429 · f5801429 · f5801429
--- a/_test_sparse.py
+++ b/_test_sparse.py
 from sparse import *
 import unittest
+import compile
-class _testCase_transpose(unittest.TestCase):
+class T_transpose(unittest.TestCase):
    def setUp(self):
-        core.build_eval_mode()
        numpy.random.seed(44)
-    def tearDown(self):
+    def test_transpose_csc(self):
-        core.pop_mode()
+        sp = sparse.csc_matrix(sparse.speye(5,3))
-    def test_transpose(self):
+        a = assparse(sp)
-        a = SparseR(sparse.csr_matrix(sparse.speye(5,3)))
+        self.failUnless(a.data is sp)
        self.failUnless(a.data.shape == (5,3))
+        self.failUnless(a.dtype == 'float64')
+        self.failUnless(a.format == 'csc', a.format)
        ta = transpose(a)
-        self.failUnless(ta.data.shape == (3,5))
+        self.failUnless(ta.dtype == 'float64', ta.dtype)
+        self.failUnless(ta.format == 'csr', ta.format)
+        vta = compile.eval_outputs([ta])
+        self.failUnless(vta.shape == (3,5))
+    def test_transpose_csr(self):
+        a = assparse(sparse.csr_matrix(sparse.speye(5,3)))
+        self.failUnless(a.data.shape == (5,3))
+        self.failUnless(a.dtype == 'float64')
+        self.failUnless(a.format == 'csr')
+        ta = transpose(a)
+        self.failUnless(ta.dtype == 'float64', ta.dtype)
+        self.failUnless(ta.format == 'csc', ta.format)
+        vta = compile.eval_outputs([ta])
+        self.failUnless(vta.shape == (3,5))
+class T_Add(unittest.TestCase):
+    def test0(self):
+        sp_a = sparse.csc_matrix(sparse.speye(5,3))
+        a = assparse(sp_a)
+        sp_b = sparse.csc_matrix(sparse.speye(5,3))
+        b = assparse(sp_b)
+        self.failUnless(a.data is sp_a)
+        apb = add_s_s(a, b)
+        self.failUnless(apb.dtype == a.dtype, apb.dtype)
+        self.failUnless(apb.format == a.format, apb.format)
+        val = compile.eval_outputs([apb])
+        self.failUnless(val.shape == (5,3))
+        self.failUnless(numpy.all(val.todense() == (sp_a + sp_b).todense()))
+class T_conversion(unittest.TestCase):
+    def setUp(self):
+        numpy.random.seed(44)
+    def test0(self):
+        a = tensor.astensor(numpy.random.rand(5))
+        s = sparse_from_dense(a,'csc')
+        val = compile.eval_outputs([s])
+        self.failUnless(str(val.dtype)=='float64')
+        self.failUnless(val.format == 'csc')
+    def test1(self):
+        a = tensor.astensor(numpy.random.rand(5))
+        s = sparse_from_dense(a,'csr')
+        val = compile.eval_outputs([s])
+        self.failUnless(str(val.dtype)=='float64')
+        self.failUnless(val.format == 'csr')
+    def test2(self):
+        csr = sparse.csr_matrix((2,5))
+        d = dense_from_sparse(csr)
+        csr[0,0] = 1.0
+        val = compile.eval_outputs([d])
+        self.failUnless(str(val.dtype)=='float64')
+        self.failUnless(numpy.all(val[0] == [1,0,0,0,0]))
 class _testCase_dot(unittest.TestCase):
    def setUp(self):
-        core.build_eval_mode()
        numpy.random.seed(44)
-    def tearDown(self):
-        core.pop_mode()
+    def test(self):
+        """Bring back the tests for sparse dot"""
+        raise NotImplementedError()
+    if 0:
        def test_basic0(self):
            for mtype in [sparse.csc_matrix, sparse.csr_matrix]:
-            x = SparseR(mtype(sparse.speye(5,3)))
+                x = assparse(mtype(sparse.speye(5,3)))
-            y = core.wrap(numpy.random.rand(3, 2))
+                y = astensor(numpy.random.rand(3, 2))
                z = dot(x,y)
                self.failUnless(z.data.shape == (5,2))
                self.failUnless(type(z.data) is mtype)
        def test_basic1(self):
            """dot: sparse left"""
            a = numpy.asarray([[1, 0, 3, 0, 5], [0, 0, -2, 0, 0]],
@@ -45,6 +110,7 @@ class _testCase_dot(unittest.TestCase):
                except Exception, e:
                    print 'cccc', mtype, e, str(e)
                    raise
        def test_basic2(self):
            """dot: sparse right"""
            a = numpy.random.rand(2, 5)
@@ -58,6 +124,7 @@ class _testCase_dot(unittest.TestCase):
                z = dot(core.ResultBase(data=a),SparseR(mtype(b)))
                self.failUnless(z.data.shape == ab.shape)
                self.failUnless(type(z.data) == type(ab))
        def test_graph_bprop0(self):
            x = core.wrap(numpy.random.rand(10,2))
            w = SparseR(sparse.csr_matrix(numpy.asarray([[1, 0, 3, 0, 5], [0, 0, -2, 0,

--- a/gof/op.py
+++ b/gof/op.py
@@ -144,7 +144,7 @@ class Op(object):
        TODO: consider moving this function to the python linker.
        """
        res = self.impl(*[input.data for input in self.inputs])
-        if self.nout == 1:
+        if len(self.outputs) == 1:
            self.outputs[0].data = res
        else:
            assert len(res) == len(self.outputs)

--- a/sparse.py
+++ b/sparse.py
+import copy #for __copy__
 import numpy
 from scipy import sparse
-import gof
+import gof.op, gof.result
+import tensor
 # Wrapper type
-class SparseR(gof.ResultBase):
+def assparse(sp, **kwargs):
+    """Return SparseR version of sp"""
+    if isinstance(sp, SparseR):
+        return sp
+    else:
+        rval = SparseR(str(sp.dtype), sp.format, **kwargs)
+        rval.data = sp
+        return rval
+class SparseR(gof.result.ResultBase):
    """
    Attribute:
-    format - a subclass of sparse.spmatrix indicating self.data.__class__
+    format - a string identifying the type of sparsity
    Properties:
    T - read-only: return a transpose of self
@@ -19,98 +30,123 @@ class SparseR(gof.ResultBase):
    Notes:
    """
-    def __init__(self, data=None, role=None, constant = False, 
+    format_cls = {
-            format = sparse.csr_matrix):
+            'csr' : sparse.csr_matrix,
-        core.ResultBase.__init__(self, role, data, constant)
+            'csc' : sparse.csc_matrix
-        if isinstance(data, sparse.spmatrix):
+            }
-            self.format = data.__class__
+    dtype_set = set(['int', 'int32', 'int64', 'float32', 'float64'])
-        else:
-            self.format = format
+    def __init__(self, dtype, format, **kwargs):
-        self._dtype = None
+        gof.ResultBase.__init__(self, **kwargs)
-        self._shape = None
+        if dtype in SparseR.dtype_set:
+            self._dtype = dtype
+        assert isinstance(format, str)
-    def data_filter(self, value):
+        #print format, type(format), SparseR.format_cls.keys(), format in SparseR.format_cls
-        if isinstance(value, sparse.spmatrix): return value
+        if format in SparseR.format_cls:
-        return sparse.csr_matrix(value)
+            self._format = format
+        else:
+            raise NotImplementedError('unsupported format "%s" not in list' % format, SparseR.format_cls.keys())
+    def filter(self, value):
+        if isinstance(value, SparseR.format_cls[self.format])\
+                and value.dtype == self.dtype:
+                    return value
+        #print 'pass-through failed', type(value)
+        sp = SparseR.format_cls[self.format](value)
+        if str(sp.dtype) != self.dtype:
+            raise NotImplementedError()
+        if sp.format != self.format:
+            raise NotImplementedError()
+        return sp
+    def __copy__(self):
+        if self.name is not None:
+            rval = SparseR(self._dtype, self._format, name=self.name)
+        else:
+            rval = SparseR(self._dtype, self._format)
+        rval.data = copy.copy(self.data)
+        return rval
-    def __add__(left, right): return add(left, right)
-    def __radd__(right, left): return add(left, right)
+    dtype = property(lambda self: self._dtype)
+    format = property(lambda self: self._format)
    T = property(lambda self: transpose(self), doc = "Return aliased transpose")
-    # self._dtype is used when self._data hasn't been set yet
-    def __dtype_get(self):
-        if self._data is None:
-            return self._dtype
-        else:
-            return self._data.dtype
-    def __dtype_set(self, dtype):
-        if self._data is None:
-            self._dtype = dtype
-        else:
-            raise StateError('cannot set dtype after data has been set')
-    dtype = property(__dtype_get, __dtype_set)
-    # self._shape is used when self._data hasn't been set yet
+    def __add__(left, right): return add(left, right)
-    def __shape_get(self):
+    def __radd__(right, left): return add(left, right)
-        if self._data is None:
-            return self._shape
-        else:
-            return self._data.shape
-    def __shape_set(self, shape):
-        if self._data is None:
-            self._shape = shape
-        else:
-            raise StateError('cannot set shape after data has been set')
-    shape = property(__shape_get, __shape_set)
-# convenience base class
-class op(gof.PythonOp, grad.update_gradient_via_grad):
-    """unite PythonOp with update_gradient_via_grad"""
 #
 # Conversion
 #
 # convert a sparse matrix to an ndarray
-class sparse2dense(op):
+class DenseFromSparse(gof.op.Op):
-    def gen_outputs(self): return [core.Numpy2()]
+    def __init__(self, x, **kwargs):
-    def impl(x): return numpy.asarray(x.todense())
+        gof.op.Op.__init__(self, **kwargs)
+        self.inputs = [assparse(x)]
+        self.outputs = [tensor.Tensor(x.dtype,[0,0])]
+    def impl(self, x):
+        return numpy.asarray(x.todense())
    def grad(self, x, gz): 
-        if x.format is sparse.coo_matrix: return dense2coo(gz)
+        return sparse_from_dense(gz, x.format)
-        if x.format is sparse.csc_matrix: return dense2csc(gz)
+dense_from_sparse = gof.op.constructor(DenseFromSparse)
-        if x.format is sparse.csr_matrix: return dense2csr(gz)
-        if x.format is sparse.dok_matrix: return dense2dok(gz)
+class SparseFromDense(gof.op.Op):
-        if x.format is sparse.lil_matrix: return dense2lil(gz)
+    def __init__(self, x, format, **kwargs):
+        gof.op.Op.__init__(self, **kwargs)
-# convert an ndarray to various sorts of sparse matrices.
+        if isinstance(format, gof.result.ResultBase):
-class _dense2sparse(op):
+            self.inputs = [tensor.astensor(x), format]
-    def gen_outputs(self): return [SparseR()]
+        else:
-    def grad(self, x, gz): return sparse2dense(gz)
+            self.inputs =  [tensor.astensor(x), gof.result.PythonResult()]
-class dense2coo(_dense2sparse):
+            self.inputs[1].data = format
-    def impl(x): return sparse.coo_matrix(x)
+        self.outputs = [SparseR(x.dtype, self.inputs[1].data)]
-class dense2csc(_dense2sparse):
+    def impl(self, x, fmt):
-    def impl(x): return sparse.csc_matrix(x)
+        # this would actually happen anyway when we try to assign to
-class dense2csr(_dense2sparse):
+        # self.outputs[0].data, but that seems hackish -JB
-    def impl(x): return sparse.csr_matrix(x)
+        return SparseR.format_cls[fmt](x)
-class dense2dok(_dense2sparse):
+    def grad(self, (x, fmt), gz):
-    def impl(x): return sparse.dok_matrix(x)
+        return dense_from_sparse(gz)
-class dense2lil(_dense2sparse):
+sparse_from_dense = gof.op.constructor(SparseFromDense)
-    def impl(x): return sparse.lil_matrix(x)
 # Linear Algebra
-class add(op):
+class Transpose(gof.op.Op):
-    def gen_outputs(self): return [SparseR()]
+    format_map = {
-    def impl(csr,y): return csr + y
+            'csr' : 'csc',
+            'csc' : 'csr'}
-class transpose(op):
+    def __init__(self, x, **kwargs):
-    def gen_outputs(self): return [SparseR()]
+        gof.op.Op.__init__(self, **kwargs)
-    def impl(x): return x.transpose() 
+        x = assparse(x)
-    def grad(self, x, gz): return transpose(gz)
+        self.inputs = [x]
+        self.outputs = [SparseR(x.dtype, Transpose.format_map[x.format])]
-class dot(op):
+    def impl(self, x):
+        return x.transpose() 
+    def grad(self, x, gz): 
+        return transpose(gz)
+transpose = gof.op.constructor(Transpose)
+class AddSS(gof.op.Op): #add two sparse matrices
+    def __init__(self, x, y, **kwargs):
+        gof.op.Op.__init__(self, **kwargs)
+        x, y = [assparse(x), assparse(y)]
+        self.inputs = [x, y]
+        if x.dtype != y.dtype:
+            raise NotImplementedError()
+        if x.format != y.format:
+            raise NotImplementedError()
+        self.outputs = [SparseR(x.dtype, x.format)]
+    def impl(self, x,y): 
+        return x + y
+    def grad(self, (x, y), gz):
+        return gz, gz
+add_s_s = gof.op.constructor(AddSS)
+if 0:
+    class dot(gof.op.Op):
        """
        Attributes:
        grad_preserves_dense - an array of boolean flags (described below)
@@ -124,7 +160,7 @@ class dot(op):
        hence this mask.
        """
        def __init__(self, *args, **kwargs):
-        op.__init__(self, *args, **kwargs)
+            gof.op.Op.__init__(self, **kwargs)
            self.grad_preserves_dense = [True, True]
        def gen_outputs(self): return [SparseR()]
        def impl(x,y):
@@ -139,6 +175,7 @@ class dot(op):
                if not isinstance(self.inputs[i], SparseR):
                    #assume it is a dense matrix
                    if self.grad_preserves_dense[i]:
-                    rval[i] = sparse2dense(rval[i])
+                        rval[i] = dense_from_sparse(rval[i])
            return rval
--- a/tensor.py
+++ b/tensor.py
@@ -70,6 +70,8 @@ class Tensor(BaseTensor):
 # alternate Tensor constructor
 def astensor(data, broadcastable=None, role=None, name=None):
    """Return a Tensor containing given data"""
+    if isinstance(data, Tensor) and broadcastable is None and role is None and name is None:
+        return data
    data = numpy.asarray(data)
    if broadcastable is None:
        broadcastable = [s==1 for s in data.shape]
@@ -116,11 +118,9 @@ def _assert_tensor_scalar(x, a):
    if numpy.product(a.shape) != 1:
        raise ValueError("The second argument must be a scalar.")
-def _as_tensor(obj):
+# this has a different name, because _as_tensor is the function which ops use
-    if isinstance(obj, Tensor):
+# to upcast their arguments... this internal-use function is a good place to put debugging stuff, better than the global astensor.
-        return obj
+_as_tensor = astensor
-    else:
-        return astensor(obj)
 class _Op(BaseTensorOp):
    """A convenient base for the ops in this file"""