提交 2125a099 authored 作者: James Bergstra's avatar James Bergstra

Much revisions to sparse tests.

上级 880078dd
......@@ -5,7 +5,7 @@ from nose.plugins.skip import SkipTest
if enable_sparse == False:
raise SkipTest('Optional package sparse disabled')
import random
import random, time
import unittest
import theano
......@@ -21,14 +21,25 @@ from theano.tests import unittest_tools as utt
def eval_outputs(outputs):
return compile.function([], outputs)()[0]
def random_lil(shape, dtype, nnz):
rval = sp.lil_matrix(shape, dtype=dtype)
huge = 2**30
for k in range(nnz):
# set non-zeros in random locations (row x, col y)
idx = numpy.random.random_integers(huge,size=len(shape)) % shape
rval.__setitem__(
idx,
numpy.random.rand())
return rval
class T_transpose(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_transpose_csc(self):
sp = sparse.csc_matrix(sparse.eye(5,3))
sp = scipy.sparse.csc_matrix(scipy.sparse.eye(5,3))
a = as_sparse_variable(sp)
self.failUnless(a.data is sp)
self.failIf(a.data is sp)
self.failUnless(a.data.shape == (5,3))
self.failUnless(a.type.dtype == 'float64', a.type.dtype)
self.failUnless(a.type.format == 'csc', a.type.format)
......@@ -39,7 +50,7 @@ class T_transpose(unittest.TestCase):
vta = eval_outputs([ta])
self.failUnless(vta.shape == (3,5))
def test_transpose_csr(self):
a = as_sparse_variable(sparse.csr_matrix(sparse.eye(5,3)))
a = as_sparse_variable(scipy.sparse.csr_matrix(scipy.sparse.eye(5,3)))
self.failUnless(a.data.shape == (5,3))
self.failUnless(a.type.dtype == 'float64')
self.failUnless(a.type.format == 'csr')
......@@ -55,13 +66,13 @@ class T_Add(unittest.TestCase):
for mtype in _mtypes:
a = mtype(numpy.array([[1., 0], [3, 0], [0, 6]]))
aR = as_sparse_variable(a)
self.failUnless(aR.data is a)
self.failIf(aR.data is a)
self.failUnless(_is_sparse(a))
self.failUnless(_is_sparse_variable(aR))
b = mtype(numpy.asarray([[0, 2.], [0, 4], [5, 0]]))
bR = as_sparse_variable(b)
self.failUnless(bR.data is b)
self.failIf(bR.data is b)
self.failUnless(_is_sparse(b))
self.failUnless(_is_sparse_variable(bR))
......@@ -82,13 +93,13 @@ class T_Add(unittest.TestCase):
for mtype in _mtypes:
a = numpy.array([[1., 0], [3, 0], [0, 6]])
aR = tensor.as_tensor_variable(a)
self.failUnless(aR.data is a)
self.failIf(aR.data is a) #constants are copied
self.failUnless(_is_dense(a))
self.failUnless(_is_dense_variable(aR))
b = mtype(numpy.asarray([[0, 2.], [0, 4], [5, 0]]))
bR = as_sparse_variable(b)
self.failUnless(bR.data is b)
self.failIf(bR.data is b) #constants are copied
self.failUnless(_is_sparse(b))
self.failUnless(_is_sparse_variable(bR))
......@@ -107,13 +118,13 @@ class T_Add(unittest.TestCase):
for mtype in _mtypes:
a = mtype(numpy.array([[1., 0], [3, 0], [0, 6]]))
aR = as_sparse_variable(a)
self.failUnless(aR.data is a)
self.failIf(aR.data is a)
self.failUnless(_is_sparse(a))
self.failUnless(_is_sparse_variable(aR))
b = numpy.asarray([[0, 2.], [0, 4], [5, 0]])
bR = tensor.as_tensor_variable(b)
self.failUnless(bR.data is b)
self.failIf(bR.data is b)
self.failUnless(_is_dense(b))
self.failUnless(_is_dense_variable(bR))
......@@ -132,136 +143,117 @@ class T_conversion(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test0(self):
a = tensor.as_tensor_variable(numpy.random.rand(5))
s = csc_from_dense(a)
val = eval_outputs([s])
self.failUnless(str(val.dtype)=='float64')
self.failUnless(val.format == 'csc')
def test1(self):
a = tensor.as_tensor_variable(numpy.random.rand(5))
s = csr_from_dense(a)
val = eval_outputs([s])
self.failUnless(str(val.dtype)=='float64')
self.failUnless(val.format == 'csr')
def test2(self):
#call dense_from_sparse
for t in _mtypes:
s = t((2,5))
s = t(scipy.sparse.identity(5))
d = dense_from_sparse(s)
s[0,0] = 1.0
val = eval_outputs([d])
if 0:
def test0(self):
a = tensor.as_tensor_variable(numpy.random.rand(5))
s = csc_from_dense(a)
val = eval_outputs([s])
self.failUnless(str(val.dtype)=='float64')
self.failUnless(numpy.all(val[0] == [1,0,0,0,0]))
self.failUnless(val.format == 'csc')
if 0:
def test1(self):
a = tensor.as_tensor_variable(numpy.random.rand(5))
s = csr_from_dense(a)
val = eval_outputs([s])
self.failUnless(str(val.dtype)=='float64')
self.failUnless(val.format == 'csr')
if 1:
def test2(self):
#call dense_from_sparse
for t in _mtypes:
s = t(scipy.sparse.identity(5))
d = dense_from_sparse(s)
# s should be copied into the graph as a constant
s[0,0] = 3.0 # changes s, but not the copy
val = eval_outputs([d])
return
self.failUnless(str(val.dtype)==s.dtype)
self.failUnless(numpy.all(val[0] == [1,0,0,0,0]))
import scipy.sparse as sp
class test_structureddot(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_structureddot_csc_grad(self):
#shortcut: testing csc in float32, testing csr in float64
# allocate a random sparse matrix
spmat = sp.csc_matrix(random_lil((4,3), 'float32', 3))
mat = numpy.asarray(numpy.random.randn(3,2), 'float32')
def buildgraphCSC(spdata,sym_mat):
csc = CSC(spdata, spmat.indices[:spmat.size],
spmat.indptr, spmat.shape)
assert csc.type.dtype == 'float32'
rval = structured_dot(csc, sym_mat)
assert rval.type.dtype == 'float32'
return rval
utt.verify_grad(buildgraphCSC,
[spmat.data, mat])
def test_structureddot_csr_grad(self):
#shortcut: testing csc in float32, testing csr in float64
# allocate a random sparse matrix
spmat = sp.csr_matrix(random_lil((4,3), 'float64', 3))
mat = numpy.asarray(numpy.random.randn(3,2), 'float64')
def buildgraph(spdata,sym_mat):
csr = CSR(spdata, spmat.indices[:spmat.size],
spmat.indptr, spmat.shape)
assert csr.type.dtype == 'float64'
rval = structured_dot(csr, sym_mat)
assert rval.type.dtype == 'float64'
return rval
utt.verify_grad(buildgraph,
[spmat.data, mat])
def test_upcast(self):
def test_structuredot(self):
bsize = 2
typenames = 'float32', 'int64', 'int8', 'int32', 'int16', 'float64', 'complex64', 'complex128'
for dense_dtype in typenames:
for sparse_dtype in typenames:
#print >> sys.stderr, dense_dtype, sparse_dtype
# iterate for a few different random graph patterns
for i in range(10):
spmat = sp.csc_matrix((4,6), dtype=sparse_dtype)
for k in range(5):
# set non-zeros in random locations (row x, col y)
x = numpy.floor(numpy.random.rand()*spmat.shape[0])
y = numpy.floor(numpy.random.rand()*spmat.shape[1])
spmat[x,y] = numpy.random.rand()*10
spmat = sp.csc_matrix(spmat)
kerns = tensor.Tensor(broadcastable=[False],
dtype=sparse_dtype)('kerns')
images = tensor.Tensor(broadcastable=[False, False],
dtype=dense_dtype)('images')
output_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
##
# Test compressed-sparse column matrices ###
##
# build symbolic theano graph
def buildgraphCSC(kerns,images):
csc = CSC(kerns, spmat.indices[:spmat.size],
spmat.indptr, spmat.shape)
assert csc.type.dtype == sparse_dtype
rval = structured_dot(csc, images.T)
assert rval.type.dtype == output_dtype
return rval
out = buildgraphCSC(kerns,images)
f = theano.function([kerns,images], out)
# compute theano outputs
kernvals = spmat.data[:spmat.size]
imvals = 1.0 + 1.0 * numpy.array(
numpy.arange(bsize*spmat.shape[1]).\
reshape(bsize,spmat.shape[1]), dtype=dense_dtype)
#print('dense_dtype=%s' % dense_dtype)
#print('sparse_dtype=%s' % sparse_dtype)
#print('i=%s' % i)
print 'kerntype', str(kernvals.dtype), kernvals.dtype.num
outvals = f(kernvals,imvals)
print 'YAY'
print spmat.todense()
print imvals.T
print "OUT1", outvals
# compare to scipy
c = spmat * (imvals.T)
assert _is_dense(c)
assert str(outvals.dtype) == output_dtype
assert numpy.all(numpy.abs(outvals -
numpy.array(c, dtype=output_dtype)) < 1e-4)
if (sparse_dtype.startswith('float') and
dense_dtype.startswith('float')):
utt.verify_grad(buildgraphCSC,
[kernvals, imvals])
print 'BBB'
##
# Test compressed-sparse row matrices ###
##
spmat = spmat.tocsr()
# build theano graph
def buildgraphCSR(kerns,images):
csr = CSR(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csr, images.T)
out = buildgraphCSR(kerns,images)
f = theano.function([kerns,images], out)
# compute theano output
kernvals[:] = spmat.data[:spmat.size]
#kernvals = numpy.empty(spmat.size, dtype=dense_dtype)
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
print 'kerntype2', str(kernvals.dtype), kernvals.dtype.num
outvals = f(kernvals,imvals)
print 'YAYAGI'
# compare to scipy
c = spmat * (imvals.T)
assert _is_dense(c)
assert str(outvals.dtype) == output_dtype
assert numpy.all(numpy.abs(outvals -
numpy.array(c, dtype=output_dtype)) < 1e-4)
# we could test more, but hopefully this suffices?
if sparse_dtype.startswith('float') and dense_dtype.startswith('float'):
utt.verify_grad( buildgraphCSR, [kernvals,imvals])
correct_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
a = SparseType('csc', dtype=sparse_dtype)()
b = tensor.matrix(dtype=dense_dtype)
d = structured_dot(a,b)
assert d.type.dtype == correct_dtype
# compile and run a function
f = theano.function([a,b],d)
M,N,K,nnz = (4,3,5,3)
spmat = sp.csc_matrix(random_lil((M,N), sparse_dtype, nnz))
# the following madness is necessary to workaround
# an intc vs. int32 bug.
# The lil makes an intc on my computer when sparse_dtype
# is int32.
spmat.dtype = numpy.dtype(sparse_dtype)
mat = numpy.asarray(numpy.random.randn(N,K)*9, dtype=dense_dtype)
print 'DTYPES', sparse_dtype,dense_dtype
print 'sym types', a.type, b.type
print 'dtype strings', spmat.dtype, mat.dtype
print 'numpy dtype num', mat.dtype.num
print 'scipy dtype num', spmat.data.dtype.num
theano_result = f(spmat, mat)
scipy_result = spmat * mat
assert theano_result.shape == scipy_result.shape
assert theano_result.dtype == scipy_result.dtype
assert numpy.allclose(theano_result, scipy_result)
def test_opt_unpack(self):
kerns = tensor.Tensor(dtype='int64', broadcastable=[False])('kerns')
spmat = sp.csc_matrix((4,6), dtype='int64')
spmat = sp.lil_matrix((4,6), dtype='int64')
for i in range(5):
# set non-zeros in random locations (row x, col y)
x = numpy.floor(numpy.random.rand()*spmat.shape[0])
......@@ -292,5 +284,94 @@ class test_structureddot(unittest.TestCase):
outvals = f(kernvals,imvals)
print outvals
def test_csc_correct_output_faster_than_scipy(self):
sparse_dtype = 'float64'
dense_dtype = 'float64'
a = SparseType('csc', dtype=sparse_dtype)()
b = tensor.matrix(dtype=dense_dtype)
d = theano.dot(a,b)
f = theano.function([a,b], d, mode='FAST_RUN')
# technically we could be using DEBUG MODE to verify internal problems.
# in fact, if this test fails for correctness, then it would be good to use DEBUG_MODE
# to figure out where thigns go wrong.
# however, comparing FAST_RUN with scipy is a quick way of ensuring all's well that
# ends well, and also lets us ensure that our speed optimizations are working.
print f.maker.mode
#print f.maker.env.toposort()
for M,N,K,nnz in [(4,3,2,3),
(40,30,20,3),
(40,30,20,30),
(400,3000,200,6000),
]:
spmat = sp.csc_matrix(random_lil((M,N), sparse_dtype, nnz))
mat = numpy.asarray(numpy.random.randn(N,K), dense_dtype)
t0 = time.time()
theano_result = f(spmat, mat)
t1 = time.time()
scipy_result = spmat * mat
t2 = time.time()
theano_time = t1-t0
scipy_time = t2-t1
#print theano_result
#print scipy_result
print 'theano took', theano_time,
print 'scipy took', scipy_time
# fail if Theano is slower than scipy by more than a certain amount
overhead_tol = 0.003 # seconds overall
overhead_rtol = 1.2 # times as long
self.failUnless(numpy.allclose(theano_result, scipy_result))
self.failIf(theano_time > overhead_rtol*scipy_time + overhead_tol)
def test_csr_correct_output_faster_than_scipy(self):
#contrast with test_grad, we put csr in float32, csc in float64
sparse_dtype = 'float32'
dense_dtype = 'float32'
a = SparseType('csr', dtype=sparse_dtype)()
b = tensor.matrix(dtype=dense_dtype)
d = theano.dot(a,b)
f = theano.function([a,b], d, mode='FAST_RUN')
# technically we could be using DEBUG MODE to verify internal problems.
# in fact, if this test fails for correctness, then it would be good to use DEBUG_MODE
# to figure out where thigns go wrong.
# however, comparing FAST_RUN with scipy is a quick way of ensuring all's well that
# ends well, and also lets us ensure that our speed optimizations are working.
print f.maker.env.toposort()
for M,N,K,nnz in [(4,3,2,3),
(40,30,20,3),
(40,30,20,30),
(400,3000,200,6000),
]:
spmat = sp.csr_matrix(random_lil((M,N), sparse_dtype, nnz))
mat = numpy.asarray(numpy.random.randn(N,K), dense_dtype)
t0 = time.time()
theano_result = f(spmat, mat)
t1 = time.time()
scipy_result = spmat * mat
t2 = time.time()
theano_time = t1-t0
scipy_time = t2-t1
#print theano_result
#print scipy_result
print 'theano took', theano_time,
print 'scipy took', scipy_time
overhead_tol = 0.002 # seconds
overhead_rtol = 1.1 # times as long
self.failUnless(numpy.allclose(theano_result, scipy_result))
self.failIf(theano_time > overhead_rtol*scipy_time + overhead_tol)
if __name__ == '__main__':
unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论