提交 e44d2518 authored 作者: lamblin's avatar lamblin

Merge pull request #535 from nouiz/sparse

Sparse
......@@ -30,6 +30,9 @@ New Features
* If you use Enthought Python Distribution (EPD) now we use its blas
implementation by default.
Sparse Sandbox graduate
* Remove0 op: it remove store element with value 0.
Sparse Sandbox Addition (Not reviewed/documented/tested, but used by some people)
* They are all in the theano.sparse.sandbox.sp2 module
* Op class: Cast, Poisson, Multinomial, EliminateZeros, Sum, Binomial
......
......@@ -16,6 +16,6 @@ except ImportError:
if enable_sparse:
from basic import *
import opt
import sharedvar
from sharedvar import sparse_constructor as shared
......@@ -731,23 +731,6 @@ class CSMGrad(gof.op.Op):
csm_grad = CSMGrad
@gof.local_optimizer([csm_properties])
def skip_pack_csc01(node):
"""if we find csm_properties(CSM(*args)), then we can replace that with the
*args directly"""
if node.op == csm_properties:
csm, = node.inputs
if csm.owner and (csm.owner.op == CSC or csm.owner.op == CSR):
# csm.owner.inputs could be broadcastable. In that case, we have
# to adjust the broadcasting flag here.
ret_var = [tensor.patternbroadcast(i, o.broadcastable)
for i, o in izip(csm.owner.inputs, node.outputs)]
return ret_var
return False
register_specialize(skip_pack_csc01)
#
# Conversion
#
......@@ -1316,6 +1299,48 @@ def mul(x, y):
raise NotImplementedError()
class Remove0(gof.Op):
"""
Remove explicit zeros from a sparse matrix, and resort indices
"""
def __init__(self, inplace=False, *args, **kwargs):
gof.Op.__init__(self, *args, **kwargs)
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
def __eq__(self, other):
return type(self) == type(other) and self.inplace == other.inplace
def __hash__(self):
return 64153 ^ hash(type(self)) ^ hash(self.inplace)
def __str__(self):
l = []
if self.inplace:
l.append('inplace')
return self.__class__.__name__ + '{%s}' % ', '.join(l)
def make_node(self, x):
return gof.Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z,)):
if self.inplace:
c = x
else:
c = x.copy()
c.eliminate_zeros()
z[0] = c
def grad(self, (x,), (gz,)):
return [gz]
def infer_shape(self, node, i0_shapes):
return i0_shapes
remove0 = Remove0()
###############
#
# StructuredDot
......
from itertools import izip
import theano
from theano import gof
from theano.sparse import (CSC, CSR, csm_properties, Remove0,
register_specialize)
@gof.local_optimizer([None])
def local_inplace_remove0(node):
"""
Optimization to insert inplace versions of Remove0.
"""
if isinstance(node.op, Remove0) and not node.op.inplace:
new_op = node.op.__class__(inplace=True)
new_node = new_op(*node.inputs)
return [new_node]
return False
theano.compile.optdb.register('local_inplace_remove0',
gof.TopoOptimizer(local_inplace_remove0,
failure_callback=gof.TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace')
@gof.local_optimizer([csm_properties])
def local_csm_properties_csm(node):
"""if we find csm_properties(CSM(*args)), then we can replace that with the
*args directly"""
if node.op == csm_properties:
csm, = node.inputs
if csm.owner and (csm.owner.op == CSC or csm.owner.op == CSR):
# csm.owner.inputs could be broadcastable. In that case, we have
# to adjust the broadcasting flag here.
ret_var = [theano.tensor.patternbroadcast(i, o.broadcastable)
for i, o in izip(csm.owner.inputs, node.outputs)]
return ret_var
return False
register_specialize(local_csm_properties_csm)
......@@ -15,7 +15,7 @@ import theano
import theano.sparse
from theano import sparse, gof, Op, tensor
from theano.gof.python25 import all, any
from theano.sparse.basic import Remove0, remove0
def register_specialize(lopt, *tags, **kwargs):
theano.compile.optdb['specialize'].register(
......@@ -279,46 +279,6 @@ def row_scale(x, s):
return col_scale(x.T, s).T
class Remove0(Op):
"""
Remove explicit zeros from a sparse matrix, and resort indices
"""
def __init__(self, inplace=False, *args, **kwargs):
Op.__init__(self, *args, **kwargs)
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
def __eq__(self, other):
return type(self) == type(other) and self.inplace == other.inplace
def __hash__(self):
return 64153 ^ hash(type(self)) ^ hash(self.inplace)
def __str__(self):
l = []
if self.inplace:
l.append('inplace')
return self.__class__.__name__ + '{%s}' % ', '.join(l)
def make_node(self, x):
return gof.Apply(self, [x], [x.type()])
def perform(self, node, (x,), (z,)):
if self.inplace:
c = x
else:
c = x.copy()
c.eliminate_zeros()
z[0] = c
def grad(self, (x,), (gz,)):
return [gz]
remove0 = Remove0()
class EnsureSortedIndices(Op):
"""
Remove explicit zeros from a sparse matrix, and resort indices
......@@ -790,10 +750,10 @@ def max_pool(images, imgshp, maxpoolshp):
convolution_indices.conv_eval(imgshp, maxpoolshp,
maxpoolshp, mode='valid')
print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX'
print 'imgshp = ', imgshp
print 'maxpoolshp = ', maxpoolshp
print 'outshp = ', outshp
# print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX'
# print 'imgshp = ', imgshp
# print 'maxpoolshp = ', maxpoolshp
# print 'outshp = ', outshp
# build sparse matrix, then generate stack of image patches
csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices,
......
......@@ -21,9 +21,9 @@ from theano.tests import unittest_tools as utt
class TestSP(unittest.TestCase):
def test_convolution(self):
print '\n\n*************************************************'
print ' TEST CONVOLUTION'
print '*************************************************'
# print '\n\n*************************************************'
# print ' TEST CONVOLUTION'
# print '*************************************************'
# fixed parameters
bsize = 10 # batch size
......@@ -118,18 +118,18 @@ class TestSP(unittest.TestCase):
#assert numpy.all(visref==visval)
print '**** Convolution Profiling Results (',mode,') ****'
print 'Numpy processing time: ', ntot
print 'Theano processing time: ', ttot
# print '**** Convolution Profiling Results (',mode,') ****'
# print 'Numpy processing time: ', ntot
# print 'Theano processing time: ', ttot
#profmode.print_summary()
def test_sparse(self):
print '\n\n*************************************************'
print ' TEST SPARSE'
print '*************************************************'
# print '\n\n*************************************************'
# print ' TEST SPARSE'
# print '*************************************************'
# fixed parameters
bsize = 10 # batch size
......@@ -209,9 +209,9 @@ class TestSP(unittest.TestCase):
visref = numpy.dot(out1,spmat.todense())
assert numpy.all(visref==visval), (visref, visval)
print '**** Sparse Profiling Results (',mode,') ****'
print 'Numpy processing time: ', ntot
print 'Theano processing time: ', ttot
# print '**** Sparse Profiling Results (',mode,') ****'
# print 'Numpy processing time: ', ntot
# print 'Theano processing time: ', ttot
#profmode.print_summary()
......@@ -409,7 +409,7 @@ class TestSP(unittest.TestCase):
# Sparse gradient on Sum on all axis
# unfinished, and suspended until verify_grad get fixed
if False:
print 'grad on sum on all axis...'
# print 'grad on sum on all axis...'
def fun(x):
## verify_grad does not handle sparse data, so here's some casting as a workaround.
# x is a dense matrix: make it sparse
......@@ -421,48 +421,12 @@ class TestSP(unittest.TestCase):
dense_sum = theano.sparse.DenseFromSparse()(sparse_sum)
return dense_sum
x_val = x_data.copy()
print type(x_val)
# print type(x_val)
import pdb;pdb.set_trace()
tensor.verify_grad(fun, [x_val], rng=rng)
#utt.verify_grad(SpSum(axis=None), [x_val])
print 'ok'
def test_remove0():
print
print 'test_remove0()'
configs=[
# structure type, numpy matching class
('csc',scipy.sparse.csc_matrix),
('csr',scipy.sparse.csr_matrix),
]
for format,matrix_class in configs:
print 'config: format=\'%(format)s\', matrix_class=%(matrix_class)s'%locals()
# real
origin = (numpy.arange(9) + 1).reshape((3, 3)).astype(theano.config.floatX)
mat = matrix_class(origin).astype(theano.config.floatX)
mat[0,1] = mat[1,0] = mat[2,2] = 0
assert mat.size == 9
# symbolic
x = theano.sparse.SparseType(format=format, dtype=theano.config.floatX)()
# the In thingy has to be there because theano has as rule not to optimize inputs
f = theano.function([theano.In(x, borrow=True, mutable=True)], sp.Remove0()(x))
# assert optimization is applied in modes with optimization
if theano.config.mode not in ['FAST_COMPILE']:
# list of apply nodes in the optimized graph.
nodes = f.maker.env.toposort()
v = [True for node in nodes if isinstance(node.op, sp.Remove0) and node.op.inplace]
assert len(v), 'Inplacing optimization should have been applied.'
# checking
# makes sense to change its name
target = mat
result = f(mat)
mat.eliminate_zeros()
assert result.size == target.size, 'Matrices sizes differ. Have zeros been removed ?'
# print 'ok'
def test_diag():
m = theano.sparse.csc_matrix()
......@@ -549,8 +513,8 @@ def test_row_scale():
f = theano.function([x, s], sp.row_scale(x, s))
print 'A', f(x_val, s_val).toarray()
print 'B', (x_val_dense.T * s_val).T
# print 'A', f(x_val, s_val).toarray()
# print 'B', (x_val_dense.T * s_val).T
assert numpy.all(f(x_val, s_val).toarray() == (x_val_dense.T * s_val).T)
......@@ -580,8 +544,8 @@ def test_col_scale():
f = theano.function([x, s], sp.col_scale(x, s))
print 'A', f(x_val, s_val).toarray()
print 'B', (x_val_dense * s_val)
# print 'A', f(x_val, s_val).toarray()
# print 'B', (x_val_dense * s_val)
assert numpy.all(f(x_val, s_val).toarray() == (x_val_dense * s_val))
......
......@@ -24,7 +24,7 @@ from theano.sparse import as_sparse_variable, CSC, CSR, CSM, CSMProperties
from theano.sparse import SparseType, CSMGrad
from theano.sparse import StructuredDot, StructuredDotCSC
from theano.sparse import StructuredDotGradCSC, StructuredDotGradCSR
from theano.sparse import AddSS, AddSD, MulSS, MulSD, Transpose, Neg
from theano.sparse import AddSS, AddSD, MulSS, MulSD, Transpose, Neg, Remove0
from theano.sparse import add, mul, structured_dot, transpose
from theano.sparse import (csc_from_dense, csr_from_dense, dense_from_sparse,
SparseFromDense)
......@@ -229,6 +229,14 @@ class SparseInferShapeTester(utt.InferShapeTester):
numpy.random.randn(10, 40).astype(config.floatX)],
MulSD)
def test_remove0(self):
x = SparseType('csr', dtype=config.floatX)()
self._compile_and_check([x],
[Remove0()(x)],
[sp.csr_matrix(random_lil((10, 40),
config.floatX, 3))],
Remove0)
def test_dot(self):
x = SparseType('csc', dtype=config.floatX)()
y = SparseType('csc', dtype=config.floatX)()
......@@ -616,11 +624,11 @@ class test_structureddot(unittest.TestCase):
spmat.dtype = numpy.dtype(sparse_dtype)
mat = numpy.asarray(numpy.random.randn(N, K) * 9,
dtype=dense_dtype)
print 'DTYPES', sparse_dtype, dense_dtype
print 'sym types', a.type, b.type
print 'dtype strings', spmat.dtype, mat.dtype
print 'numpy dtype num', mat.dtype.num
print 'scipy dtype num', spmat.data.dtype.num
#print 'DTYPES', sparse_dtype, dense_dtype
#print 'sym types', a.type, b.type
#print 'dtype strings', spmat.dtype, mat.dtype
#print 'numpy dtype num', mat.dtype.num
#print 'scipy dtype num', spmat.data.dtype.num
theano_result = f(spmat, mat)
scipy_result = spmat * mat
assert theano_result.shape == scipy_result.shape
......@@ -657,7 +665,7 @@ class test_structureddot(unittest.TestCase):
sdcscpresent = False
for node in f.maker.env.toposort():
print node.op
#print node.op
assert not isinstance(node.op, CSM)
assert not isinstance(node.op, CSMProperties)
if isinstance(f.maker.env.toposort()[1].op, StructuredDotCSC):
......@@ -672,7 +680,7 @@ class test_structureddot(unittest.TestCase):
imvals = 1.0 * numpy.array(numpy.arange(bsize * spmat.shape[1]).\
reshape(bsize, spmat.shape[1]), dtype='float32')
outvals = f(kernvals, imvals)
print outvals
#print outvals
def test_dot_sparse_sparse(self):
#test dot for 2 input sparse matrix
......@@ -730,10 +738,10 @@ class test_structureddot(unittest.TestCase):
scipy_time = numpy.min(scipy_times)
speedup = scipy_time / theano_time
print scipy_times
print theano_times
print ('M=%(M)s N=%(N)s K=%(K)s nnz=%(nnz)s theano_time'
'=%(theano_time)s speedup=%(speedup)s') % locals()
#print scipy_times
#print theano_times
#print ('M=%(M)s N=%(N)s K=%(K)s nnz=%(nnz)s theano_time'
# '=%(theano_time)s speedup=%(speedup)s') % locals()
# fail if Theano is slower than scipy by more than a certain amount
overhead_tol = 0.003 # seconds overall
......@@ -770,10 +778,8 @@ class test_structureddot(unittest.TestCase):
theano_time = t1 - t0
scipy_time = t2 - t1
#print theano_result
#print scipy_result
print 'theano took', theano_time,
print 'scipy took', scipy_time
#print 'theano took', theano_time,
#print 'scipy took', scipy_time
overhead_tol = 0.002 # seconds
overhead_rtol = 1.1 # times as long
self.assertTrue(numpy.allclose(theano_result, scipy_result))
......@@ -1160,6 +1166,46 @@ def test_size():
check()
def test_remove0():
configs = [
# structure type, numpy matching class
('csc', scipy.sparse.csc_matrix),
('csr', scipy.sparse.csr_matrix),
]
for format, matrix_class in configs:
# real
origin = (numpy.arange(9) + 1).reshape((3, 3)).astype(config.floatX)
mat = matrix_class(origin).astype(theano.config.floatX)
mat[0, 1] = mat[1, 0] = mat[2, 2] = 0
assert mat.size == 9
# symbolic
x = theano.sparse.SparseType(format=format, dtype=config.floatX)()
# the In thingy has to be there because theano has as rule not
# to optimize inputs
f = theano.function([theano.In(x, borrow=True, mutable=True)],
Remove0()(x))
# assert optimization local_inplace_remove0 is applied in
# modes with optimization
if theano.config.mode not in ['FAST_COMPILE']:
# list of apply nodes in the optimized graph.
nodes = f.maker.env.toposort()
v = [True for node in nodes
if isinstance(node.op, Remove0) and node.op.inplace]
assert len(v), 'Inplacing optimization should have been applied.'
# checking
# makes sense to change its name
target = mat
result = f(mat)
mat.eliminate_zeros()
msg = 'Matrices sizes differ. Have zeros been removed ?'
assert result.size == target.size, msg
class Test_getitem(unittest.TestCase):
def setUp(self):
self.rng = numpy.random.RandomState(utt.fetch_seed())
......
import numpy
try:
import scipy.sparse as sp
import scipy.sparse
except ImportError:
pass # The variable enable_sparse will be used to disable the test file.
import theano
from theano import config, tensor
from theano.sparse import (enable_sparse, CSM, CSMProperties, csm_properties,
CSC, CSR)
from theano.sparse.tests.test_basic import random_lil
#from theano.gof.python25 import all, any, product
if enable_sparse == False:
raise SkipTest('Optional package sparse disabled')
def test_local_csm_properties_csm():
data = tensor.vector()
indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
tensor.ivector())
for CS, cast in [(CSC, sp.csc_matrix), (CSR, sp.csr_matrix)]:
f = theano.function([data, indices, indptr, shape],
csm_properties(CS(data, indices, indptr, shape)))
#theano.printing.debugprint(f)
assert not any(isinstance(node.op, (CSM, CSMProperties)) for node
in f.maker.env.toposort())
v = cast(random_lil((10, 40),
config.floatX, 3))
f(v.data, v.indices, v.indptr, v.shape)
......@@ -33,9 +33,6 @@ from theano.gof.opt import (Optimizer, pre_constant_merge,
from theano.gof import toolbox, DestroyHandler
from basic import get_constant_value, ShapeError
# Remove0 is lazily imported to avoid circular imports.
Remove0 = None
theano.configparser.AddConfigVar('on_shape_error',
"warn: print a warning and use the default"
......@@ -1974,24 +1971,6 @@ compile.optdb.register('local_inplace_incsubtensor1',
failure_callback=TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace') # DEBUG
@gof.local_optimizer([None])
def local_inplace_remove0(node):
"""
Optimization to insert inplace versions of Remove0.
"""
global Remove0
if Remove0 is None:
from theano.sparse.sandbox.sp import Remove0
if isinstance(node.op, Remove0) and not node.op.inplace:
new_op = node.op.__class__(inplace=True)
new_node = new_op(*node.inputs)
return [new_node]
return False
compile.optdb.register('local_inplace_remove0',
TopoOptimizer(local_inplace_remove0,
failure_callback=TopoOptimizer.warn_inplace), 60,
'fast_run', 'inplace')
@register_canonicalize
@register_stabilize
......
......@@ -164,7 +164,7 @@ class InferShapeTester(unittest.TestCase):
outputs_function = theano.function(inputs, outputs, mode=self.mode)
shapes_function = theano.function(inputs, [o.shape for o in outputs],
mode=self.mode)
theano.printing.debugprint(shapes_function)
#theano.printing.debugprint(shapes_function)
# Check that the Op is removed from the compiled function.
topo_shape = shapes_function.maker.env.toposort()
assert not any(isinstance(t.op, cls) for t in topo_shape)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论