提交 4bb1a152 authored 作者: nouiz's avatar nouiz

Merge pull request #426 from dwf/sparse_infer_shape

Add shape inference for sparse/basic.py Ops
...@@ -660,6 +660,13 @@ class CSMGrad(gof.op.Op): ...@@ -660,6 +660,13 @@ class CSMGrad(gof.op.Op):
grad = numpy.zeros_like(data) grad = numpy.zeros_like(data)
grad[self.kmap] = gout_data grad[self.kmap] = gout_data
g_data[0] = grad g_data[0] = grad
def infer_shape(self, node, shapes):
if self.kmap is None:
return [shapes[1]]
else:
return [shapes[0]]
csm_grad = CSMGrad csm_grad = CSMGrad
...@@ -719,8 +726,9 @@ class DenseFromSparse(gof.op.Op): ...@@ -719,8 +726,9 @@ class DenseFromSparse(gof.op.Op):
else: else:
return [SparseFromDense(x.type.format)(gz)] return [SparseFromDense(x.type.format)(gz)]
def infer_shape(self, node, (ishape,)): def infer_shape(self, node, shapes):
return [ishape] return [shapes[0]]
dense_from_sparse = DenseFromSparse() dense_from_sparse = DenseFromSparse()
...@@ -754,8 +762,9 @@ class SparseFromDense(gof.op.Op): ...@@ -754,8 +762,9 @@ class SparseFromDense(gof.op.Op):
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
return dense_from_sparse(gz), return dense_from_sparse(gz),
def infer_shape(self, node, (ishape,)): def infer_shape(self, node, shapes):
return [ishape] return [shapes[0]]
csr_from_dense = SparseFromDense('csr') csr_from_dense = SparseFromDense('csr')
csc_from_dense = SparseFromDense('csc') csc_from_dense = SparseFromDense('csc')
...@@ -875,7 +884,7 @@ class GetItemScalar(gof.op.Op): ...@@ -875,7 +884,7 @@ class GetItemScalar(gof.op.Op):
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self))
def infer_shape(self, node, i0_shapes): def infer_shape(self, node, shapes):
return [()] return [()]
def make_node(self, x, index): def make_node(self, x, index):
...@@ -939,6 +948,10 @@ class Transpose(gof.op.Op): ...@@ -939,6 +948,10 @@ class Transpose(gof.op.Op):
def grad(self, (x,), (gz,)): def grad(self, (x,), (gz,)):
assert _is_sparse_variable(x) and _is_sparse_variable(gz) assert _is_sparse_variable(x) and _is_sparse_variable(gz)
return transpose(gz), return transpose(gz),
def infer_shape(self, node, shapes):
return [shapes[0][::-1]]
transpose = Transpose() transpose = Transpose()
...@@ -960,6 +973,10 @@ class Neg(gof.op.Op): ...@@ -960,6 +973,10 @@ class Neg(gof.op.Op):
def grad(self, (x,), (gz,)): def grad(self, (x,), (gz,)):
assert _is_sparse_variable(x) and _is_sparse_variable(gz) assert _is_sparse_variable(x) and _is_sparse_variable(gz)
return -gz, return -gz,
def infer_shape(self, node, shapes):
return [shapes[0]]
neg = Neg() neg = Neg()
...@@ -992,6 +1009,10 @@ class AddSS(gof.op.Op): ...@@ -992,6 +1009,10 @@ class AddSS(gof.op.Op):
assert _is_sparse_variable(x) and _is_sparse_variable(y) assert _is_sparse_variable(x) and _is_sparse_variable(y)
assert _is_sparse_variable(gz) assert _is_sparse_variable(gz)
return gz, gz return gz, gz
def infer_shape(self, node, shapes):
return [shapes[0]]
add_s_s = AddSS() add_s_s = AddSS()
...@@ -1026,6 +1047,10 @@ class AddSD(gof.op.Op): ...@@ -1026,6 +1047,10 @@ class AddSD(gof.op.Op):
assert _is_sparse_variable(x) and _is_dense_variable(y) assert _is_sparse_variable(x) and _is_dense_variable(y)
assert _is_dense_variable(gz) assert _is_dense_variable(gz)
return sp_ones_like(x) * gz, gz return sp_ones_like(x) * gz, gz
def infer_shape(self, node, shapes):
return [shapes[0]]
add_s_d = AddSD() add_s_d = AddSD()
...@@ -1083,6 +1108,10 @@ class MulSS(gof.op.Op): ...@@ -1083,6 +1108,10 @@ class MulSS(gof.op.Op):
def grad(self, (x, y), (gz,)): def grad(self, (x, y), (gz,)):
return y * gz, x * gz return y * gz, x * gz
def infer_shape(self, node, shapes):
return [shapes[0]]
mul_s_s = MulSS() mul_s_s = MulSS()
...@@ -1158,6 +1187,10 @@ class MulSD(gof.op.Op): ...@@ -1158,6 +1187,10 @@ class MulSD(gof.op.Op):
assert _is_sparse_variable(x) and _is_dense_variable(y) assert _is_sparse_variable(x) and _is_dense_variable(y)
assert _is_sparse_variable(gz) assert _is_sparse_variable(gz)
return y * gz, x * gz return y * gz, x * gz
def infer_shape(self, node, shapes):
return [shapes[0]]
mul_s_d = MulSD() mul_s_d = MulSD()
...@@ -1262,6 +1295,10 @@ class StructuredDot(gof.Op): ...@@ -1262,6 +1295,10 @@ class StructuredDot(gof.Op):
# ga = g_out x b.T # ga = g_out x b.T
# gb = a.T x g_out # gb = a.T x g_out
return [structured_dot_grad(a, b, g_out), structured_dot(a.T, g_out)] return [structured_dot_grad(a, b, g_out), structured_dot(a.T, g_out)]
def infer_shape(self, node, shapes):
return [(shapes[0][0], shapes[1][1])]
_structured_dot = StructuredDot() _structured_dot = StructuredDot()
...@@ -1668,7 +1705,7 @@ class StructuredDotGradCSC(gof.Op): ...@@ -1668,7 +1705,7 @@ class StructuredDotGradCSC(gof.Op):
ind1 = a_indptr[j + 1] ind1 = a_indptr[j + 1]
for i_idx in xrange(ind0, ind1): for i_idx in xrange(ind0, ind1):
i = a_indices[i_idx] i = a_indices[i_idx]
g_a_data[i_idx] = numpy.dot(g_ab[i], b[j]) g_a_data[i_idx] = numpy.dot(g_ab[i], b[j].T)[0, 0]
out[0] = g_a_data out[0] = g_a_data
def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub): def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
...@@ -1756,6 +1793,10 @@ class StructuredDotGradCSC(gof.Op): ...@@ -1756,6 +1793,10 @@ class StructuredDotGradCSC(gof.Op):
} }
""" % dict(locals(), **sub) """ % dict(locals(), **sub)
def infer_shape(self, node, shapes):
return [shapes[0]]
sdg_csc = StructuredDotGradCSC() sdg_csc = StructuredDotGradCSC()
...@@ -1779,7 +1820,7 @@ class StructuredDotGradCSR(gof.Op): ...@@ -1779,7 +1820,7 @@ class StructuredDotGradCSR(gof.Op):
for j_idx in xrange(ind0, ind1): for j_idx in xrange(ind0, ind1):
j = a_indices[j_idx] j = a_indices[j_idx]
# grad is dot product of i-th row of gradient with j-th row of b # grad is dot product of i-th row of gradient with j-th row of b
g_a_data[j_idx] = numpy.dot(g_ab[i], b[j]) g_a_data[j_idx] = numpy.dot(g_ab[i], b[j].T)[0, 0]
out[0] = g_a_data out[0] = g_a_data
def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub): def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
...@@ -1869,6 +1910,8 @@ class StructuredDotGradCSR(gof.Op): ...@@ -1869,6 +1910,8 @@ class StructuredDotGradCSR(gof.Op):
""" % dict(locals(), **sub) """ % dict(locals(), **sub)
def infer_shape(self, node, shapes):
return [shapes[0]]
sdg_csr = StructuredDotGradCSR() sdg_csr = StructuredDotGradCSR()
......
...@@ -20,11 +20,12 @@ if enable_sparse == False: ...@@ -20,11 +20,12 @@ if enable_sparse == False:
from theano.sparse.basic import _is_dense, _is_sparse, _mtypes from theano.sparse.basic import _is_dense, _is_sparse, _mtypes
from theano.sparse.basic import _is_dense_variable, _is_sparse_variable from theano.sparse.basic import _is_dense_variable, _is_sparse_variable
from theano.sparse import as_sparse_variable, CSC, CSR, CSM, CSMProperties from theano.sparse import as_sparse_variable, CSC, CSR, CSM, CSMProperties
from theano.sparse import SparseType, StructuredDotCSC from theano.sparse import SparseType, StructuredDotCSC, CSMGrad
from theano.sparse import AddSS, AddSD, MulSS, MulSD, Transpose, Neg
from theano.sparse import add, mul, structured_dot, transpose from theano.sparse import add, mul, structured_dot, transpose
from theano.sparse import csc_from_dense, csr_from_dense, dense_from_sparse from theano.sparse import csc_from_dense, csr_from_dense, dense_from_sparse
from theano.sparse import Dot, Usmm, UsmmCscDense from theano.sparse import Dot, Usmm, UsmmCscDense
from theano.sparse import get_item_2d, get_item_scalar #from theano.sparse import get_item_2d, get_item_scalar
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano import tensor from theano import tensor
...@@ -91,6 +92,103 @@ class T_transpose(unittest.TestCase): ...@@ -91,6 +92,103 @@ class T_transpose(unittest.TestCase):
self.assertTrue(vta.shape == (3, 5)) self.assertTrue(vta.shape == (3, 5))
class SparseInferShapeTester(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def _compile_and_check(self, inputs, outputs, numeric_inputs, cls):
outputs_function = theano.function(inputs, outputs)
shapes_function = theano.function(inputs, [o.shape for o in outputs])
# Check that the Op is removed from the compiled function.
topo_shape = shapes_function.maker.env.toposort()
assert not any(isinstance(t.op, cls) for t in topo_shape)
topo_out = outputs_function.maker.env.toposort()
assert any(isinstance(t.op, cls) for t in topo_out)
# Check that the shape produced agrees with the actual shape.
numeric_outputs = outputs_function(*numeric_inputs)
numeric_shapes = shapes_function(*numeric_inputs)
for out, shape in zip(numeric_outputs, numeric_shapes):
assert numpy.all(out.shape == shape)
def test_getitem_2d(self):
raise SkipTest('infer_shape not implemented for GetItem2d yet')
def test_csm_grad(self):
for sparsetype in ('csr', 'csc'):
x = tensor.vector()
y = tensor.ivector()
z = tensor.ivector()
s = tensor.ivector()
call = getattr(sp, sparsetype + '_matrix')
spm = call(random_lil((300, 400), config.floatX, 5))
out = tensor.grad(dense_from_sparse(
CSM(sparsetype)(x, y, z, s)
).sum(), x)
self._compile_and_check([x, y, z, s],
[out],
[spm.data, spm.indices, spm.indptr,
spm.shape],
CSMGrad
)
def test_transpose(self):
x = SparseType('csr', dtype=config.floatX)()
self._compile_and_check([x],
[x.T],
[sp.csr_matrix(random_lil((10, 40),
config.floatX, 3))],
Transpose)
def test_neg(self):
x = SparseType('csr', dtype=config.floatX)()
self._compile_and_check([x],
[-x],
[sp.csr_matrix(random_lil((10, 40),
config.floatX, 3))],
Neg)
def test_add_ss(self):
x = SparseType('csr', dtype=config.floatX)()
y = SparseType('csr', dtype=config.floatX)()
self._compile_and_check([x, y],
[x + y],
[sp.csr_matrix(random_lil((10, 40),
config.floatX, 3)),
sp.csr_matrix(random_lil((10, 40),
config.floatX, 3))],
AddSS)
def test_add_sd(self):
x = SparseType('csr', dtype=config.floatX)()
y = tensor.matrix()
self._compile_and_check([x, y],
[x + y],
[sp.csr_matrix(random_lil((10, 40),
config.floatX, 3)),
numpy.random.randn(10, 40)],
AddSD)
def test_mul_ss(self):
x = SparseType('csr', dtype=config.floatX)()
y = SparseType('csr', dtype=config.floatX)()
self._compile_and_check([x, y],
[x * y],
[sp.csr_matrix(random_lil((10, 40),
config.floatX, 3)),
] * 2,
MulSS)
def test_mul_sd(self):
x = SparseType('csr', dtype=config.floatX)()
y = tensor.matrix()
self._compile_and_check([x, y],
[x * y],
[sp.csr_matrix(random_lil((10, 40),
config.floatX, 3)),
numpy.random.randn(10, 40)],
MulSD)
class T_AddMul(unittest.TestCase): class T_AddMul(unittest.TestCase):
def testAddSS(self): def testAddSS(self):
self._testSS(add) self._testSS(add)
...@@ -363,6 +461,22 @@ class test_structureddot(unittest.TestCase): ...@@ -363,6 +461,22 @@ class test_structureddot(unittest.TestCase):
utt.verify_grad(buildgraph, utt.verify_grad(buildgraph,
[spmat.data, mat]) [spmat.data, mat])
def test_infer_shape_csr_csc_grad(self):
for sparsetype in ('csr', 'csc'):
a = SparseType(sparsetype, dtype=config.floatX)()
b = SparseType(sparsetype, dtype=config.floatX)()
grads = tensor.grad(dense_from_sparse(structured_dot(a, b)).sum(),
[a, b])
f = theano.function([a, b], [g.shape for g in grads])
topo = f.maker.env.toposort()
assert not any(isinstance(t, self.__class__) for t in topo)
call = getattr(sp, sparsetype + '_matrix')
x = call(random_lil((500, 300), config.floatX, 10))
y = call(random_lil((300, 400), config.floatX, 5))
out1, out2 = f(x, y)
assert numpy.all(out1 == x.shape)
assert numpy.all(out2 == y.shape)
def test_upcast(self): def test_upcast(self):
typenames = ('float32', 'int64', 'int8', 'int32', typenames = ('float32', 'int64', 'int8', 'int32',
...@@ -553,6 +667,16 @@ class test_structureddot(unittest.TestCase): ...@@ -553,6 +667,16 @@ class test_structureddot(unittest.TestCase):
self.assertFalse(theano_time > overhead_rtol * scipy_time + self.assertFalse(theano_time > overhead_rtol * scipy_time +
overhead_tol) overhead_tol)
def test_infer_shape(self):
a = SparseType('csc', dtype=config.floatX)()
b = SparseType('csc', dtype=config.floatX)()
f = theano.function([a, b], structured_dot(a, b).shape)
topo = f.maker.env.toposort()
assert not any(isinstance(t, self.__class__) for t in topo)
x = sp.csc_matrix((4, 5), dtype=config.floatX)
y = sp.csc_matrix((5, 3), dtype=config.floatX)
assert numpy.all(f(x, y) == numpy.array((4, 3)))
class DotTests(unittest.TestCase): class DotTests(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -1028,7 +1152,7 @@ class Test_getitem(unittest.TestCase): ...@@ -1028,7 +1152,7 @@ class Test_getitem(unittest.TestCase):
assert r10.shape == t10.shape assert r10.shape == t10.shape
assert numpy.all(r10.toarray() == t10.toarray()) assert numpy.all(r10.toarray() == t10.toarray())
f11 = theano.function([x, a], x[:,a:]) f11 = theano.function([x, a], x[:, a:])
r11 = f11(vx, p) r11 = f11(vx, p)
t11 = vx[:, p:] t11 = vx[:, p:]
assert r11.shape == t11.shape assert r11.shape == t11.shape
...@@ -1057,7 +1181,7 @@ class Test_getitem(unittest.TestCase): ...@@ -1057,7 +1181,7 @@ class Test_getitem(unittest.TestCase):
self.assertRaises(ValueError, self.assertRaises(ValueError,
x.__getitem__, slice(tensor.fscalar('f'), None)) x.__getitem__, slice(tensor.fscalar('f'), None))
self.assertRaises(ValueError, self.assertRaises(ValueError,
x.__getitem__, (slice(None), slice([1,3,4], None))) x.__getitem__, (slice(None), slice([1, 3, 4], None)))
def test_GetItemScalar(self): def test_GetItemScalar(self):
sparse_formats = ('csc', 'csr') sparse_formats = ('csc', 'csr')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论