提交 84ac684c authored 作者: lamblin's avatar lamblin

Merge pull request #412 from lamblin/sparse_indexing

Sparse indexing
......@@ -1330,11 +1330,16 @@ class _Linker(gof.link.LocalLinker):
r_vals_initialized = []
for r in storage_map:
if (r.owner is None):
if (storage_map[r][0] is None):
raise Exception('Missing input', r)
if not r.type.is_valid_value(storage_map[r][0]):
# None may be a valid input value (for instance,
# for a Generic object). We only want to raise
# an error if it is not valid.
if (storage_map[r][0] is None):
raise InvalidValueError(r, storage_map[r][0],
hint="Graph Input '%s' is missing" % str(r))
raise InvalidValueError(r, storage_map[r][0],
hint="Graph Input '%s' is missing" % str(r))
hint=("Graph Input '%s' has invalid value "
"%s" % (r, storage_map[r][0])))
r_vals[r] = storage_map[r][0]
storage_map[r][0] = None
r_vals_initialized.append(r)
......@@ -1577,7 +1582,8 @@ class _Linker(gof.link.LocalLinker):
#print storage_map
for r in storage_map:
if (r.owner is None):
assert storage_map[r][0] is not None
if not r.type.is_valid_value(None):
assert storage_map[r][0] is not None
###############
......
......@@ -391,7 +391,11 @@ class Constant(Value):
def __str__(self):
if self.name is not None:
return self.name
return str(self.data) #+ "::" + str(self.type)
else:
name = str(self.data)
if len(name) > 20:
name = name[:10] + '...' + name[-10]
return 'Constant{%s}' % name
def clone(self):
"""
We clone this object, but we don't clone the data to lower memory requirement
......
......@@ -423,4 +423,7 @@ class Generic(SingletonType):
Py_INCREF(py_%(name)s);
""" % locals()
def __str__(self):
return self.__class__.__name__
generic = Generic()
......@@ -188,13 +188,11 @@ class _sparse_py_operators:
if not isinstance(args, tuple):
args = args,
scalar_var = tensor.iscalar()
if len(args) == 2:
scalar_arg_1 = (numpy.isscalar(args[0]) or
getattr(args[0], 'type', None) == scalar_var.type)
getattr(args[0], 'type', None) == tensor.iscalar)
scalar_arg_2 = (numpy.isscalar(args[1]) or
getattr(args[1], 'type', None) == scalar_var.type)
getattr(args[1], 'type', None) == tensor.iscalar)
if scalar_arg_1 and scalar_arg_2:
ret = get_item_scalar(self, args)
else:
......@@ -202,8 +200,8 @@ class _sparse_py_operators:
else:
ret = get_item_2d(self, args)
return ret
class SparseVariable(gof.Variable, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
......@@ -681,35 +679,57 @@ class GetItem2d(gof.op.Op):
assert len(index) in [1, 2]
input_op = [x]
generic_None = theano.gof.Constant(theano.gof.generic, None)
for ind in index:
if isinstance(ind, slice):
# in case of slice is written in theano variable
start = ind.start
stop = ind.stop
# in case of slice is written in python int
if isinstance(start, int):
start = theano.tensor.constant(start)
if isinstance(stop, int):
stop = theano.tensor.constant(stop)
#in case of indexing using python int
#elif isinstance(ind,int):
# start = theano.tensor.constant(ind)
# stop = start + 1
#elif ind.ndim == 0:
# start = ind
# stop = ind + 1
else:
raise NotImplemented(
if ind.step is not None:
raise ValueError((
"Using a slice with non-default step when "
"indexing into a sparse matrix is not supported. "),
ind, ind.step)
# If start or stop are None, make them a Generic constant
# Else, they should be converted to Tensor Variables of
# dimension 1 and int/uint dtype.
if start is None:
start = generic_None
else:
if not isinstance(start, gof.Variable):
start = tensor.as_tensor_variable(start)
if not (start.ndim == 0 and start.dtype in tensor.discrete_dtypes):
raise ValueError((
"Impossible to index into a sparse matrix with "
"slice where start=%s" % start),
start.ndim, start.dtype)
if stop is None:
stop = generic_None
else:
if not isinstance(stop, gof.Variable):
stop = tensor.as_tensor_variable(stop)
if not (stop.ndim == 0 and stop.dtype in tensor.discrete_dtypes):
raise ValueError((
"Impossible to index into a sparse matrix with "
"slice where stop=%s" % stop),
stop.ndim, stop.dtype)
elif ((isinstance(ind, gof.Variable) and
getattr(ind, 'ndim', -1) == 0)
or numpy.isscalar(ind)):
raise NotImplementedError(
'Theano has no sparse vector' +
'Use X[a:b,c:d], X[a:b,c:c+1] or X[a:b] instead.')
else:
raise ValueError((
'Advanced indexing is not implemented for sparse '
'matrices. Argument not supported: %s' % ind))
input_op += [start, stop]
if len(index) == 1:
i = theano.gof.Constant(theano.gof.generic, None)
input_op += [i, i]
input_op += [generic_None, generic_None]
return gof.Apply(self, input_op, [x.type()])
......@@ -765,7 +785,7 @@ class GetItemScalar(gof.op.Op):
def perform(self, node, (x, ind1, ind2), (out, )):
assert _is_sparse(x)
out[0] = x[ind1, ind2]
out[0] = theano._asarray(x[ind1, ind2], x.dtype)
def __str__(self):
return self.__class__.__name__
......
......@@ -930,129 +930,171 @@ def test_size():
check()
def test_GetItem2D():
sparse_formats = ('csc', 'csr')
for format in sparse_formats:
x = theano.sparse.matrix(format)
a = theano.tensor.iscalar()
b = theano.tensor.iscalar()
c = theano.tensor.iscalar()
d = theano.tensor.iscalar()
# index
m = 1
n = 5
p = 10
q = 15
vx = as_sparse_format(numpy.random.binomial(1, 0.5, (100, 100)),
format).astype(theano.config.floatX)
#mode_no_debug = theano.compile.mode.get_default_mode()
#if isinstance(mode_no_debug, theano.compile.DebugMode):
# mode_no_debug = 'FAST_RUN'
f1 = theano.function([x, a, b, c, d], x[a:b, c:d])
r1 = f1(vx, m, n, p, q)
t1 = vx[m:n, p:q]
assert r1.shape == t1.shape
assert numpy.all(t1.toarray() == r1.toarray())
""""
Important: based on a discussion with both Fred and James
The following indexing methods is not supported because the rval
would be a sparse matrix rather than a sparse vector, which is a
deviation from numpy indexing rule. This decision is made largely
for keeping the consistency between numpy and theano.
f2 = theano.function([x, a, b, c], x[a:b, c])
r2 = f2(vx, m, n, p)
t2 = vx[m:n, p]
assert r2.shape == t2.shape
assert numpy.all(t2.toarray() == r2.toarray())
f3 = theano.function([x, a, b, c], x[a, b:c])
r3 = f3(vx, m, n, p)
t3 = vx[m, n:p]
assert r3.shape == t3.shape
assert numpy.all(t3.toarray() == r3.toarray())
f5 = theano.function([x], x[1:2,3])
r5 = f5(vx)
t5 = vx[1:2, 3]
assert r5.shape == t5.shape
assert numpy.all(r5.toarray() == t5.toarray())
f7 = theano.function([x], x[50])
r7 = f7(vx)
t7 = vx[50]
assert r7.shape == t7.shape
assert numpy.all(r7.toarray() == t7.toarray())
"""
f4 = theano.function([x, a, b], x[a:b])
r4 = f4(vx, m, n)
t4 = vx[m:n]
assert r4.shape == t4.shape
assert numpy.all(t4.toarray() == r4.toarray())
#-----------------------------------------------------------
# test cases using int indexing instead of theano variable
f6 = theano.function([x], x[1:10, 10:20])
r6 = f6(vx)
t6 = vx[1:10, 10:20]
assert r6.shape == t6.shape
assert numpy.all(r6.toarray() == t6.toarray())
#----------------------------------------------------------
# test cases with indexing both with theano variable and int
f8 = theano.function([x, a, b], x[a:b, 10:20])
r8 = f8(vx, m, n)
t8 = vx[m:n, 10:20]
assert r8.shape == t8.shape
assert numpy.all(r8.toarray() == t8.toarray())
f9 = theano.function([x, a, b], x[1:a, 1:b])
r9 = f9(vx, p, q)
t9 = vx[1:p, 1:q]
assert r9.shape == t9.shape
assert numpy.all(r9.toarray() == t9.toarray())
def test_GetItemScalar():
sparse_formats = ('csc', 'csr')
for format in sparse_formats:
x = theano.sparse.csc_matrix('x')
a = theano.tensor.iscalar()
b = theano.tensor.iscalar()
m = 50
n = 50
vx = as_sparse_format(numpy.random.binomial(1, 0.5, (100, 100)),
format).astype(theano.config.floatX)
f1 = theano.function([x, a, b], x[a, b])
r1 = f1(vx, 10, 10)
t1 = vx[10, 10]
assert r1.shape == t1.shape
assert numpy.all(t1 == r1)
f2 = theano.function([x, a], x[50, a])
r2 = f2(vx, m)
t2 = vx[50, m]
assert r2.shape == t2.shape
assert numpy.all(t2 == r2)
f3 = theano.function([x, a], x[a, 50])
r3 = f3(vx, m)
t3 = vx[m, 50]
assert r3.shape == t3.shape
assert numpy.all(t3 == r3)
f4 = theano.function([x], x[50, 50])
r4 = f4(vx)
t4 = vx[m, n]
assert r3.shape == t3.shape
assert numpy.all(t4 == r4)
class Test_getitem(unittest.TestCase):
def setUp(self):
self.rng = numpy.random.RandomState(utt.fetch_seed())
def test_GetItem2D(self):
sparse_formats = ('csc', 'csr')
for format in sparse_formats:
x = theano.sparse.matrix(format, name='x')
a = theano.tensor.iscalar('a')
b = theano.tensor.iscalar('b')
c = theano.tensor.iscalar('c')
d = theano.tensor.iscalar('d')
# index
m = 1
n = 5
p = 10
q = 15
vx = as_sparse_format(self.rng.binomial(1, 0.5, (100, 97)),
format).astype(theano.config.floatX)
#mode_no_debug = theano.compile.mode.get_default_mode()
#if isinstance(mode_no_debug, theano.compile.DebugMode):
# mode_no_debug = 'FAST_RUN'
f1 = theano.function([x, a, b, c, d], x[a:b, c:d])
r1 = f1(vx, m, n, p, q)
t1 = vx[m:n, p:q]
assert r1.shape == t1.shape
assert numpy.all(t1.toarray() == r1.toarray())
""""
Important: based on a discussion with both Fred and James
The following indexing methods is not supported because the rval
would be a sparse matrix rather than a sparse vector, which is a
deviation from numpy indexing rule. This decision is made largely
for keeping the consistency between numpy and theano.
f2 = theano.function([x, a, b, c], x[a:b, c])
r2 = f2(vx, m, n, p)
t2 = vx[m:n, p]
assert r2.shape == t2.shape
assert numpy.all(t2.toarray() == r2.toarray())
f3 = theano.function([x, a, b, c], x[a, b:c])
r3 = f3(vx, m, n, p)
t3 = vx[m, n:p]
assert r3.shape == t3.shape
assert numpy.all(t3.toarray() == r3.toarray())
f5 = theano.function([x], x[1:2,3])
r5 = f5(vx)
t5 = vx[1:2, 3]
assert r5.shape == t5.shape
assert numpy.all(r5.toarray() == t5.toarray())
f7 = theano.function([x], x[50])
r7 = f7(vx)
t7 = vx[50]
assert r7.shape == t7.shape
assert numpy.all(r7.toarray() == t7.toarray())
"""
f4 = theano.function([x, a, b], x[a:b])
r4 = f4(vx, m, n)
t4 = vx[m:n]
assert r4.shape == t4.shape
assert numpy.all(t4.toarray() == r4.toarray())
#-----------------------------------------------------------
# test cases using int indexing instead of theano variable
f6 = theano.function([x], x[1:10, 10:20])
r6 = f6(vx)
t6 = vx[1:10, 10:20]
assert r6.shape == t6.shape
assert numpy.all(r6.toarray() == t6.toarray())
#----------------------------------------------------------
# test cases with indexing both with theano variable and int
f8 = theano.function([x, a, b], x[a:b, 10:20])
r8 = f8(vx, m, n)
t8 = vx[m:n, 10:20]
assert r8.shape == t8.shape
assert numpy.all(r8.toarray() == t8.toarray())
f9 = theano.function([x, a, b], x[1:a, 1:b])
r9 = f9(vx, p, q)
t9 = vx[1:p, 1:q]
assert r9.shape == t9.shape
assert numpy.all(r9.toarray() == t9.toarray())
#-----------------------------------------------------------
# Test mixing None and variables
f10 = theano.function([x, a, b], x[:a, :b])
r10 = f10(vx, p, q)
t10 = vx[:p, :q]
assert r10.shape == t10.shape
assert numpy.all(r10.toarray() == t10.toarray())
f11 = theano.function([x, a], x[:,a:])
r11 = f11(vx, p)
t11 = vx[:, p:]
assert r11.shape == t11.shape
assert numpy.all(r11.toarray() == t11.toarray())
#------------------------------------------------------------
# Invalid things
# The syntax is a bit awkward because assertRaises forbids
# the [] shortcut for getitem.
# x[a:b] is not accepted because we don't have sparse vectors
self.assertRaises(NotImplementedError,
x.__getitem__, (slice(a, b), c))
# x[a:b:step, c:d] is not accepted because scipy silently drops
# the step (!)
self.assertRaises(ValueError,
x.__getitem__, (slice(a, b, -1), slice(c, d)))
self.assertRaises(ValueError,
x.__getitem__, (slice(a, b), slice(c, d, 2)))
# Advanced indexing is not supported
self.assertRaises(ValueError,
x.__getitem__, (tensor.ivector('l'), slice(a, b)))
# Indexing with random things is not supported either
self.assertRaises(ValueError,
x.__getitem__, slice(tensor.fscalar('f'), None))
self.assertRaises(ValueError,
x.__getitem__, (slice(None), slice([1,3,4], None)))
def test_GetItemScalar(self):
sparse_formats = ('csc', 'csr')
for format in sparse_formats:
x = theano.sparse.csc_matrix('x')
a = theano.tensor.iscalar()
b = theano.tensor.iscalar()
m = 50
n = 42
vx = as_sparse_format(self.rng.binomial(1, 0.5, (97, 100)),
format).astype(theano.config.floatX)
f1 = theano.function([x, a, b], x[a, b])
r1 = f1(vx, 10, 10)
t1 = vx[10, 10]
assert r1.shape == t1.shape
assert numpy.all(t1 == r1)
f2 = theano.function([x, a], x[50, a])
r2 = f2(vx, m)
t2 = vx[50, m]
assert r2.shape == t2.shape
assert numpy.all(t2 == r2)
f3 = theano.function([x, a], x[a, 50])
r3 = f3(vx, m)
t3 = vx[m, 50]
assert r3.shape == t3.shape
assert numpy.all(t3 == r3)
f4 = theano.function([x], x[50, 42])
r4 = f4(vx)
t4 = vx[m, n]
assert r3.shape == t3.shape
assert numpy.all(t4 == r4)
import theano.tensor.tests.test_sharedvar
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论