提交 59f05f20 authored 作者: Joseph Turian's avatar Joseph Turian

merge

.. _sparse:
===============
Sparse matrices
===============
scipy.sparse
------------
Note that you want scipy >= 0.7.0. 0.6 has a very bug and inconsistent
implementation of sparse matrices.
We describe the details of the compressed sparse matrix types.
``scipy.sparse.csc_matrix``
should be used if the columns are sparse.
``scipy.sparse.csr_matrix``
should be used if the rows are sparse.
``scipy.sparse.lil_matrix``
is faster if we are modifying the array. After initial inserts,
we can then convert to the appropriate sparse matrix format.
There are four member variables that comprise a compressed matrix ``sp``:
``sp.shape``
gives the shape of the matrix.
``sp.data``
gives the values of the non-zero entries. For CSC, these should
be in order from (I think, not sure) reading down in columns,
starting at the leftmost column until we reach the rightmost
column.
``sp.indices``
gives the location of the non-zero entry. For CSC, this is the
row location.
``sp.indptr``
gives the other location of the non-zero entry. For CSC, there are
as many values of indptr as there are columns + 1 in the matrix.
``sp.indptr[k] = x`` and ``indptr[k+1] = y`` means that column
k contains sp.data[x:y], i.e. the xth through the y-1th non-zero values.
See the example below for details.
.. code-block:: python
>>> import scipy.sparse
>>> sp = scipy.sparse.csc_matrix((5, 10))
>>> sp[4, 0] = 20
/u/lisa/local/byhost/test_maggie46.iro.umontreal.ca/lib64/python2.5/site-packages/scipy/sparse/compressed.py:494: SparseEfficiencyWarning: changing the sparsity structure of a csc_matrix is expensive. lil_matrix is more efficient.
SparseEfficiencyWarning)
>>> sp[0, 0] = 10
>>> sp[2, 3] = 30
>>> sp.todense()
matrix([[ 10., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 30., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 20., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
>>> print sp
(0, 0) 10.0
(4, 0) 20.0
(2, 3) 30.0
>>> sp.shape
(5, 10)
>>> sp.data
array([ 10., 20., 30.])
>>> sp.indices
array([0, 4, 2], dtype=int32)
>>> sp.indptr
array([0, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3], dtype=int32)
Several things should be learned from the above example:
* We actually use the wrong sparse matrix type. In fact, it is the
*rows* that are sparse, not the columns. So, it would have been
better to use ``sp = scipy.sparse.csr_matrix((5, 10))``.
* We should have actually created the matrix as a ``lil_matrix``,
which is more efficient for inserts. Afterwards, we should convert
to the appropriate compressed format.
* `sp.indptr[0] = 0` and `sp.indptr[1] = 2`, which means that
column 0 contains sp.data[0:2], i.e. the first two non-zero values.
* `sp.indptr[3] = 2` and `sp.indptr[4] = 3`, which means that column
3 contains sp.data[2:3], i.e. the third non-zero value.
TODO: Rewrite this documentation to do things in a smarter way.
...@@ -26,6 +26,9 @@ _mtypes = [sparse.csc_matrix, sparse.csr_matrix] ...@@ -26,6 +26,9 @@ _mtypes = [sparse.csc_matrix, sparse.csr_matrix]
#* new class ``bsr_matrix`` : the Block CSR format #* new class ``bsr_matrix`` : the Block CSR format
_mtype_to_str = {sparse.csc_matrix: "csc", sparse.csr_matrix: "csr"} _mtype_to_str = {sparse.csc_matrix: "csc", sparse.csr_matrix: "csr"}
import scipy
if scipy.__version__ != '0.7.0':
sys.stderr.write("WARNING: scipy version = %s. We prefer version >=0.7.0 because it has bugs fixed in the sparse matrix code.\n" % scipy.__version__)
def _is_sparse_result(x): def _is_sparse_result(x):
""" """
...@@ -33,7 +36,7 @@ def _is_sparse_result(x): ...@@ -33,7 +36,7 @@ def _is_sparse_result(x):
@return: True iff x is a L{SparseResult} (and not a L{tensor.Tensor}) @return: True iff x is a L{SparseResult} (and not a L{tensor.Tensor})
""" """
if not isinstance(x.type, Sparse) and not isinstance(x.type, tensor.Tensor): if not isinstance(x.type, Sparse) and not isinstance(x.type, tensor.Tensor):
raise NotImplementedError("this function should only be called on results of type sparse.Sparse or tensor.Tensor, not,", x) raise NotImplementedError("this function should only be called on *results* (of type sparse.Sparse or tensor.Tensor), not,", x)
return isinstance(x.type, Sparse) return isinstance(x.type, Sparse)
def _is_dense_result(x): def _is_dense_result(x):
""" """
...@@ -41,7 +44,7 @@ def _is_dense_result(x): ...@@ -41,7 +44,7 @@ def _is_dense_result(x):
@return: True unless x is a L{SparseResult} (and not a L{tensor.Tensor}) @return: True unless x is a L{SparseResult} (and not a L{tensor.Tensor})
""" """
if not isinstance(x.type, Sparse) and not isinstance(x.type, tensor.Tensor): if not isinstance(x.type, Sparse) and not isinstance(x.type, tensor.Tensor):
raise NotImplementedError("this function should only be called on results of type sparse.Sparse or tensor.Tensor, not,", x) raise NotImplementedError("this function should only be called on *results* (of type sparse.Sparse or tensor.Tensor), not,", x)
return isinstance(x.type, tensor.Tensor) return isinstance(x.type, tensor.Tensor)
def _is_sparse(x): def _is_sparse(x):
...@@ -371,7 +374,12 @@ class DenseFromSparse(gof.op.Op): ...@@ -371,7 +374,12 @@ class DenseFromSparse(gof.op.Op):
[tensor.Tensor(dtype = x.type.dtype, [tensor.Tensor(dtype = x.type.dtype,
broadcastable = (False, False)).make_result()]) broadcastable = (False, False)).make_result()])
def perform(self, node, (x, ), (out, )): def perform(self, node, (x, ), (out, )):
out[0] = x.toarray() if _is_dense(x):
print >> sys.stderr, "WARNING: You just called DenseFromSparse on a dense matrix:", x
out[0] = x
else:
out[0] = x.toarray()
assert _is_dense(out[0])
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
if self.sparse_grad: if self.sparse_grad:
return [sp_ones_like(x) * gz] return [sp_ones_like(x) * gz]
...@@ -686,10 +694,11 @@ class StructuredDot(gof.Op): ...@@ -686,10 +694,11 @@ class StructuredDot(gof.Op):
result = a.dot(b) result = a.dot(b)
# sparse dot generates sparse matrix, unless output has single dimension # scipy 0.7.0 automatically casts to dense, so the following is not necessary:
if sparse.issparse(result): # # sparse dot generates sparse matrix, unless output has single dimension
result = result.toarray() # if sparse.issparse(result):
assert isinstance(result, numpy.ndarray) # result = result.toarray()
assert _is_dense(result)
# dot of an NxM sparse matrix, with a Mx1 dense matrix, returns vector not matrix # dot of an NxM sparse matrix, with a Mx1 dense matrix, returns vector not matrix
if result.ndim == 1: if result.ndim == 1:
...@@ -701,7 +710,7 @@ class StructuredDot(gof.Op): ...@@ -701,7 +710,7 @@ class StructuredDot(gof.Op):
if result.shape != (a.shape[0], b.shape[1]): if result.shape != (a.shape[0], b.shape[1]):
if b.shape[0] == 1: if b.shape[0] == 1:
raise Exception("a.shape=%s, b.shape=%s, result.shape=%s ??? This is probably because scipy.csc_matrix dot has a bug with singleton dimensions (i.e. b.shape[0]=1), for scipy 0.6. Use scipy 0.7" % (a.shape, b.shape, result.shape)) raise Exception("a.shape=%s, b.shape=%s, result.shape=%s ??? This is probably because scipy.csc_matrix dot has a bug with singleton dimensions (i.e. b.shape[0]=1), for scipy 0.6. Use scipy 0.7. NB you have scipy version %s" % (a.shape, b.shape, result.shape, scipy.__version__))
else: else:
raise Exception("a.shape=%s, b.shape=%s, result.shape=%s ??? I have no idea why") raise Exception("a.shape=%s, b.shape=%s, result.shape=%s ??? I have no idea why")
...@@ -747,7 +756,10 @@ class StructuredDotCSC(gof.Op): ...@@ -747,7 +756,10 @@ class StructuredDotCSC(gof.Op):
a = sparse.csc_matrix((a_val, a_ind, a_ptr), a = sparse.csc_matrix((a_val, a_ind, a_ptr),
(a_nrows, b.shape[0]), (a_nrows, b.shape[0]),
copy = False) copy = False)
out[0] = numpy.asarray(a.dot(b).todense()) # TODO: todense() is automatic in 0.7.0, just remove the following line:
# out[0] = numpy.asarray(a.dot(b).todense())
out[0] = a.dot(b)
assert _is_dense(out[0])
def c_code(self, node, name, (a_val, a_ind, a_ptr, a_nrows, b), (z,), sub): def c_code(self, node, name, (a_val, a_ind, a_ptr, a_nrows, b), (z,), sub):
return """ return """
......
...@@ -340,7 +340,15 @@ class test_structureddot(unittest.TestCase): ...@@ -340,7 +340,15 @@ class test_structureddot(unittest.TestCase):
kernvals = spmat.data[:spmat.size] kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1]) imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals) outvals = f(kernvals,imvals)
assert numpy.all(outvals == spmat.dot(imvals.T).todense()) print type(spmat.dot(imvals.T))
print spmat.dot(imvals.T)
print dir(spmat.dot(imvals.T))
# scipy 0.7.0 should already make the output dense
# assert numpy.all(outvals == spmat.dot(imvals.T).todense())
c = spmat.dot(imvals.T)
assert _is_dense(c)
assert numpy.all(outvals == c)
tensor.verify_grad(None, buildgraphCSC, [kernvals,imvals], mode=mode) tensor.verify_grad(None, buildgraphCSC, [kernvals,imvals], mode=mode)
...@@ -355,7 +363,12 @@ class test_structureddot(unittest.TestCase): ...@@ -355,7 +363,12 @@ class test_structureddot(unittest.TestCase):
kernvals = spmat.data[:spmat.size] kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1]) imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals) outvals = f(kernvals,imvals)
assert numpy.all(outvals == spmat.dot(imvals.T).todense())
# scipy 0.7.0 should already make the output dense
# assert numpy.all(outvals == spmat.dot(imvals.T).todense())
c = spmat.dot(imvals.T)
assert _is_dense(c)
assert numpy.all(outvals == c)
tensor.verify_grad(None, buildgraphCSR, [kernvals,imvals], mode=mode) tensor.verify_grad(None, buildgraphCSR, [kernvals,imvals], mode=mode)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论