提交 f5801429 authored 作者: james@mackie's avatar james@mackie

bringing back sparse

上级 a7e53331
from sparse import * from sparse import *
import unittest import unittest
import compile
class _testCase_transpose(unittest.TestCase): class T_transpose(unittest.TestCase):
def setUp(self): def setUp(self):
core.build_eval_mode()
numpy.random.seed(44) numpy.random.seed(44)
def tearDown(self): def test_transpose_csc(self):
core.pop_mode() sp = sparse.csc_matrix(sparse.speye(5,3))
def test_transpose(self): a = assparse(sp)
a = SparseR(sparse.csr_matrix(sparse.speye(5,3))) self.failUnless(a.data is sp)
self.failUnless(a.data.shape == (5,3)) self.failUnless(a.data.shape == (5,3))
self.failUnless(a.dtype == 'float64')
self.failUnless(a.format == 'csc', a.format)
ta = transpose(a) ta = transpose(a)
self.failUnless(ta.data.shape == (3,5)) self.failUnless(ta.dtype == 'float64', ta.dtype)
self.failUnless(ta.format == 'csr', ta.format)
vta = compile.eval_outputs([ta])
self.failUnless(vta.shape == (3,5))
def test_transpose_csr(self):
a = assparse(sparse.csr_matrix(sparse.speye(5,3)))
self.failUnless(a.data.shape == (5,3))
self.failUnless(a.dtype == 'float64')
self.failUnless(a.format == 'csr')
ta = transpose(a)
self.failUnless(ta.dtype == 'float64', ta.dtype)
self.failUnless(ta.format == 'csc', ta.format)
vta = compile.eval_outputs([ta])
self.failUnless(vta.shape == (3,5))
class T_Add(unittest.TestCase):
def test0(self):
sp_a = sparse.csc_matrix(sparse.speye(5,3))
a = assparse(sp_a)
sp_b = sparse.csc_matrix(sparse.speye(5,3))
b = assparse(sp_b)
self.failUnless(a.data is sp_a)
apb = add_s_s(a, b)
self.failUnless(apb.dtype == a.dtype, apb.dtype)
self.failUnless(apb.format == a.format, apb.format)
val = compile.eval_outputs([apb])
self.failUnless(val.shape == (5,3))
self.failUnless(numpy.all(val.todense() == (sp_a + sp_b).todense()))
class T_conversion(unittest.TestCase):
def setUp(self):
numpy.random.seed(44)
def test0(self):
a = tensor.astensor(numpy.random.rand(5))
s = sparse_from_dense(a,'csc')
val = compile.eval_outputs([s])
self.failUnless(str(val.dtype)=='float64')
self.failUnless(val.format == 'csc')
def test1(self):
a = tensor.astensor(numpy.random.rand(5))
s = sparse_from_dense(a,'csr')
val = compile.eval_outputs([s])
self.failUnless(str(val.dtype)=='float64')
self.failUnless(val.format == 'csr')
def test2(self):
csr = sparse.csr_matrix((2,5))
d = dense_from_sparse(csr)
csr[0,0] = 1.0
val = compile.eval_outputs([d])
self.failUnless(str(val.dtype)=='float64')
self.failUnless(numpy.all(val[0] == [1,0,0,0,0]))
class _testCase_dot(unittest.TestCase): class _testCase_dot(unittest.TestCase):
def setUp(self): def setUp(self):
core.build_eval_mode()
numpy.random.seed(44) numpy.random.seed(44)
def tearDown(self):
core.pop_mode() def test(self):
"""Bring back the tests for sparse dot"""
raise NotImplementedError()
if 0:
def test_basic0(self): def test_basic0(self):
for mtype in [sparse.csc_matrix, sparse.csr_matrix]: for mtype in [sparse.csc_matrix, sparse.csr_matrix]:
x = SparseR(mtype(sparse.speye(5,3))) x = assparse(mtype(sparse.speye(5,3)))
y = core.wrap(numpy.random.rand(3, 2)) y = astensor(numpy.random.rand(3, 2))
z = dot(x,y) z = dot(x,y)
self.failUnless(z.data.shape == (5,2)) self.failUnless(z.data.shape == (5,2))
self.failUnless(type(z.data) is mtype) self.failUnless(type(z.data) is mtype)
def test_basic1(self): def test_basic1(self):
"""dot: sparse left""" """dot: sparse left"""
a = numpy.asarray([[1, 0, 3, 0, 5], [0, 0, -2, 0, 0]], a = numpy.asarray([[1, 0, 3, 0, 5], [0, 0, -2, 0, 0]],
...@@ -45,6 +110,7 @@ class _testCase_dot(unittest.TestCase): ...@@ -45,6 +110,7 @@ class _testCase_dot(unittest.TestCase):
except Exception, e: except Exception, e:
print 'cccc', mtype, e, str(e) print 'cccc', mtype, e, str(e)
raise raise
def test_basic2(self): def test_basic2(self):
"""dot: sparse right""" """dot: sparse right"""
a = numpy.random.rand(2, 5) a = numpy.random.rand(2, 5)
...@@ -58,6 +124,7 @@ class _testCase_dot(unittest.TestCase): ...@@ -58,6 +124,7 @@ class _testCase_dot(unittest.TestCase):
z = dot(core.ResultBase(data=a),SparseR(mtype(b))) z = dot(core.ResultBase(data=a),SparseR(mtype(b)))
self.failUnless(z.data.shape == ab.shape) self.failUnless(z.data.shape == ab.shape)
self.failUnless(type(z.data) == type(ab)) self.failUnless(type(z.data) == type(ab))
def test_graph_bprop0(self): def test_graph_bprop0(self):
x = core.wrap(numpy.random.rand(10,2)) x = core.wrap(numpy.random.rand(10,2))
w = SparseR(sparse.csr_matrix(numpy.asarray([[1, 0, 3, 0, 5], [0, 0, -2, 0, w = SparseR(sparse.csr_matrix(numpy.asarray([[1, 0, 3, 0, 5], [0, 0, -2, 0,
......
...@@ -144,7 +144,7 @@ class Op(object): ...@@ -144,7 +144,7 @@ class Op(object):
TODO: consider moving this function to the python linker. TODO: consider moving this function to the python linker.
""" """
res = self.impl(*[input.data for input in self.inputs]) res = self.impl(*[input.data for input in self.inputs])
if self.nout == 1: if len(self.outputs) == 1:
self.outputs[0].data = res self.outputs[0].data = res
else: else:
assert len(res) == len(self.outputs) assert len(res) == len(self.outputs)
......
import copy #for __copy__
import numpy import numpy
from scipy import sparse from scipy import sparse
import gof import gof.op, gof.result
import tensor
# Wrapper type # Wrapper type
class SparseR(gof.ResultBase): def assparse(sp, **kwargs):
"""Return SparseR version of sp"""
if isinstance(sp, SparseR):
return sp
else:
rval = SparseR(str(sp.dtype), sp.format, **kwargs)
rval.data = sp
return rval
class SparseR(gof.result.ResultBase):
""" """
Attribute: Attribute:
format - a subclass of sparse.spmatrix indicating self.data.__class__ format - a string identifying the type of sparsity
Properties: Properties:
T - read-only: return a transpose of self T - read-only: return a transpose of self
...@@ -19,98 +30,123 @@ class SparseR(gof.ResultBase): ...@@ -19,98 +30,123 @@ class SparseR(gof.ResultBase):
Notes: Notes:
""" """
def __init__(self, data=None, role=None, constant = False, format_cls = {
format = sparse.csr_matrix): 'csr' : sparse.csr_matrix,
core.ResultBase.__init__(self, role, data, constant) 'csc' : sparse.csc_matrix
if isinstance(data, sparse.spmatrix): }
self.format = data.__class__ dtype_set = set(['int', 'int32', 'int64', 'float32', 'float64'])
else:
self.format = format def __init__(self, dtype, format, **kwargs):
self._dtype = None gof.ResultBase.__init__(self, **kwargs)
self._shape = None if dtype in SparseR.dtype_set:
self._dtype = dtype
assert isinstance(format, str)
def data_filter(self, value): #print format, type(format), SparseR.format_cls.keys(), format in SparseR.format_cls
if isinstance(value, sparse.spmatrix): return value if format in SparseR.format_cls:
return sparse.csr_matrix(value) self._format = format
else:
raise NotImplementedError('unsupported format "%s" not in list' % format, SparseR.format_cls.keys())
def filter(self, value):
if isinstance(value, SparseR.format_cls[self.format])\
and value.dtype == self.dtype:
return value
#print 'pass-through failed', type(value)
sp = SparseR.format_cls[self.format](value)
if str(sp.dtype) != self.dtype:
raise NotImplementedError()
if sp.format != self.format:
raise NotImplementedError()
return sp
def __copy__(self):
if self.name is not None:
rval = SparseR(self._dtype, self._format, name=self.name)
else:
rval = SparseR(self._dtype, self._format)
rval.data = copy.copy(self.data)
return rval
def __add__(left, right): return add(left, right)
def __radd__(right, left): return add(left, right)
dtype = property(lambda self: self._dtype)
format = property(lambda self: self._format)
T = property(lambda self: transpose(self), doc = "Return aliased transpose") T = property(lambda self: transpose(self), doc = "Return aliased transpose")
# self._dtype is used when self._data hasn't been set yet
def __dtype_get(self):
if self._data is None:
return self._dtype
else:
return self._data.dtype
def __dtype_set(self, dtype):
if self._data is None:
self._dtype = dtype
else:
raise StateError('cannot set dtype after data has been set')
dtype = property(__dtype_get, __dtype_set)
# self._shape is used when self._data hasn't been set yet def __add__(left, right): return add(left, right)
def __shape_get(self): def __radd__(right, left): return add(left, right)
if self._data is None:
return self._shape
else:
return self._data.shape
def __shape_set(self, shape):
if self._data is None:
self._shape = shape
else:
raise StateError('cannot set shape after data has been set')
shape = property(__shape_get, __shape_set)
# convenience base class
class op(gof.PythonOp, grad.update_gradient_via_grad):
"""unite PythonOp with update_gradient_via_grad"""
# #
# Conversion # Conversion
# #
# convert a sparse matrix to an ndarray # convert a sparse matrix to an ndarray
class sparse2dense(op): class DenseFromSparse(gof.op.Op):
def gen_outputs(self): return [core.Numpy2()] def __init__(self, x, **kwargs):
def impl(x): return numpy.asarray(x.todense()) gof.op.Op.__init__(self, **kwargs)
self.inputs = [assparse(x)]
self.outputs = [tensor.Tensor(x.dtype,[0,0])]
def impl(self, x):
return numpy.asarray(x.todense())
def grad(self, x, gz): def grad(self, x, gz):
if x.format is sparse.coo_matrix: return dense2coo(gz) return sparse_from_dense(gz, x.format)
if x.format is sparse.csc_matrix: return dense2csc(gz) dense_from_sparse = gof.op.constructor(DenseFromSparse)
if x.format is sparse.csr_matrix: return dense2csr(gz)
if x.format is sparse.dok_matrix: return dense2dok(gz) class SparseFromDense(gof.op.Op):
if x.format is sparse.lil_matrix: return dense2lil(gz) def __init__(self, x, format, **kwargs):
gof.op.Op.__init__(self, **kwargs)
# convert an ndarray to various sorts of sparse matrices. if isinstance(format, gof.result.ResultBase):
class _dense2sparse(op): self.inputs = [tensor.astensor(x), format]
def gen_outputs(self): return [SparseR()] else:
def grad(self, x, gz): return sparse2dense(gz) self.inputs = [tensor.astensor(x), gof.result.PythonResult()]
class dense2coo(_dense2sparse): self.inputs[1].data = format
def impl(x): return sparse.coo_matrix(x) self.outputs = [SparseR(x.dtype, self.inputs[1].data)]
class dense2csc(_dense2sparse): def impl(self, x, fmt):
def impl(x): return sparse.csc_matrix(x) # this would actually happen anyway when we try to assign to
class dense2csr(_dense2sparse): # self.outputs[0].data, but that seems hackish -JB
def impl(x): return sparse.csr_matrix(x) return SparseR.format_cls[fmt](x)
class dense2dok(_dense2sparse): def grad(self, (x, fmt), gz):
def impl(x): return sparse.dok_matrix(x) return dense_from_sparse(gz)
class dense2lil(_dense2sparse): sparse_from_dense = gof.op.constructor(SparseFromDense)
def impl(x): return sparse.lil_matrix(x)
# Linear Algebra # Linear Algebra
class add(op): class Transpose(gof.op.Op):
def gen_outputs(self): return [SparseR()] format_map = {
def impl(csr,y): return csr + y 'csr' : 'csc',
'csc' : 'csr'}
class transpose(op): def __init__(self, x, **kwargs):
def gen_outputs(self): return [SparseR()] gof.op.Op.__init__(self, **kwargs)
def impl(x): return x.transpose() x = assparse(x)
def grad(self, x, gz): return transpose(gz) self.inputs = [x]
self.outputs = [SparseR(x.dtype, Transpose.format_map[x.format])]
class dot(op): def impl(self, x):
return x.transpose()
def grad(self, x, gz):
return transpose(gz)
transpose = gof.op.constructor(Transpose)
class AddSS(gof.op.Op): #add two sparse matrices
def __init__(self, x, y, **kwargs):
gof.op.Op.__init__(self, **kwargs)
x, y = [assparse(x), assparse(y)]
self.inputs = [x, y]
if x.dtype != y.dtype:
raise NotImplementedError()
if x.format != y.format:
raise NotImplementedError()
self.outputs = [SparseR(x.dtype, x.format)]
def impl(self, x,y):
return x + y
def grad(self, (x, y), gz):
return gz, gz
add_s_s = gof.op.constructor(AddSS)
if 0:
class dot(gof.op.Op):
""" """
Attributes: Attributes:
grad_preserves_dense - an array of boolean flags (described below) grad_preserves_dense - an array of boolean flags (described below)
...@@ -124,7 +160,7 @@ class dot(op): ...@@ -124,7 +160,7 @@ class dot(op):
hence this mask. hence this mask.
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
op.__init__(self, *args, **kwargs) gof.op.Op.__init__(self, **kwargs)
self.grad_preserves_dense = [True, True] self.grad_preserves_dense = [True, True]
def gen_outputs(self): return [SparseR()] def gen_outputs(self): return [SparseR()]
def impl(x,y): def impl(x,y):
...@@ -139,6 +175,7 @@ class dot(op): ...@@ -139,6 +175,7 @@ class dot(op):
if not isinstance(self.inputs[i], SparseR): if not isinstance(self.inputs[i], SparseR):
#assume it is a dense matrix #assume it is a dense matrix
if self.grad_preserves_dense[i]: if self.grad_preserves_dense[i]:
rval[i] = sparse2dense(rval[i]) rval[i] = dense_from_sparse(rval[i])
return rval return rval
...@@ -70,6 +70,8 @@ class Tensor(BaseTensor): ...@@ -70,6 +70,8 @@ class Tensor(BaseTensor):
# alternate Tensor constructor # alternate Tensor constructor
def astensor(data, broadcastable=None, role=None, name=None): def astensor(data, broadcastable=None, role=None, name=None):
"""Return a Tensor containing given data""" """Return a Tensor containing given data"""
if isinstance(data, Tensor) and broadcastable is None and role is None and name is None:
return data
data = numpy.asarray(data) data = numpy.asarray(data)
if broadcastable is None: if broadcastable is None:
broadcastable = [s==1 for s in data.shape] broadcastable = [s==1 for s in data.shape]
...@@ -116,11 +118,9 @@ def _assert_tensor_scalar(x, a): ...@@ -116,11 +118,9 @@ def _assert_tensor_scalar(x, a):
if numpy.product(a.shape) != 1: if numpy.product(a.shape) != 1:
raise ValueError("The second argument must be a scalar.") raise ValueError("The second argument must be a scalar.")
def _as_tensor(obj): # this has a different name, because _as_tensor is the function which ops use
if isinstance(obj, Tensor): # to upcast their arguments... this internal-use function is a good place to put debugging stuff, better than the global astensor.
return obj _as_tensor = astensor
else:
return astensor(obj)
class _Op(BaseTensorOp): class _Op(BaseTensorOp):
"""A convenient base for the ops in this file""" """A convenient base for the ops in this file"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论