Merge pull request #426 from dwf/sparse_infer_shape

Add shape inference for sparse/basic.py Ops

Merge pull request #426 from dwf/sparse_infer_shape
4bb1a152 · nouiz · 7071ddef · 613c3547 · 4bb1a152 · 4bb1a152
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -660,6 +660,13 @@ class CSMGrad(gof.op.Op):
            grad = numpy.zeros_like(data)
            grad[self.kmap] = gout_data
            g_data[0] = grad
+
+    def infer_shape(self, node, shapes):
+        if self.kmap is None:
+            return [shapes[1]]
+        else:
+            return [shapes[0]]
+
 csm_grad = CSMGrad


@@ -719,8 +726,9 @@ class DenseFromSparse(gof.op.Op):
        else:
            return [SparseFromDense(x.type.format)(gz)]

-    def infer_shape(self, node, (ishape,)):
-        return [ishape]
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
 dense_from_sparse = DenseFromSparse()


@@ -754,8 +762,9 @@ class SparseFromDense(gof.op.Op):
    def grad(self, (x, ), (gz, )):
        return dense_from_sparse(gz),

-    def infer_shape(self, node, (ishape,)):
-        return [ishape]
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
 csr_from_dense = SparseFromDense('csr')
 csc_from_dense = SparseFromDense('csc')

@@ -875,7 +884,7 @@ class GetItemScalar(gof.op.Op):
    def __hash__(self):
        return hash(type(self))

-    def infer_shape(self, node, i0_shapes):
+    def infer_shape(self, node, shapes):
        return [()]

    def make_node(self, x, index):
@@ -939,6 +948,10 @@ class Transpose(gof.op.Op):
    def grad(self, (x,), (gz,)):
        assert _is_sparse_variable(x) and _is_sparse_variable(gz)
        return transpose(gz),
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0][::-1]]
+
 transpose = Transpose()


@@ -960,6 +973,10 @@ class Neg(gof.op.Op):
    def grad(self, (x,), (gz,)):
        assert _is_sparse_variable(x) and _is_sparse_variable(gz)
        return -gz,
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
 neg = Neg()


@@ -992,6 +1009,10 @@ class AddSS(gof.op.Op):
        assert _is_sparse_variable(x) and _is_sparse_variable(y)
        assert _is_sparse_variable(gz)
        return gz, gz
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
 add_s_s = AddSS()


@@ -1026,6 +1047,10 @@ class AddSD(gof.op.Op):
        assert _is_sparse_variable(x) and _is_dense_variable(y)
        assert _is_dense_variable(gz)
        return sp_ones_like(x) * gz, gz
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
 add_s_d = AddSD()


@@ -1083,6 +1108,10 @@ class MulSS(gof.op.Op):

    def grad(self, (x, y), (gz,)):
        return y * gz, x * gz
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
 mul_s_s = MulSS()


@@ -1158,6 +1187,10 @@ class MulSD(gof.op.Op):
        assert _is_sparse_variable(x) and _is_dense_variable(y)
        assert _is_sparse_variable(gz)
        return y * gz, x * gz
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
 mul_s_d = MulSD()


@@ -1262,6 +1295,10 @@ class StructuredDot(gof.Op):
        # ga = g_out x b.T
        # gb = a.T x g_out
        return [structured_dot_grad(a, b, g_out), structured_dot(a.T, g_out)]
+
+    def infer_shape(self, node, shapes):
+        return [(shapes[0][0], shapes[1][1])]
+
 _structured_dot = StructuredDot()


@@ -1668,7 +1705,7 @@ class StructuredDotGradCSC(gof.Op):
            ind1 = a_indptr[j + 1]
            for i_idx in xrange(ind0, ind1):
                i = a_indices[i_idx]
-                g_a_data[i_idx] = numpy.dot(g_ab[i], b[j])
+                g_a_data[i_idx] = numpy.dot(g_ab[i], b[j].T)[0, 0]
        out[0] = g_a_data

    def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
@@ -1756,6 +1793,10 @@ class StructuredDotGradCSC(gof.Op):
        }

        """ % dict(locals(), **sub)
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
 sdg_csc = StructuredDotGradCSC()


@@ -1779,7 +1820,7 @@ class StructuredDotGradCSR(gof.Op):
            for j_idx in xrange(ind0, ind1):
                j = a_indices[j_idx]
                # grad is dot product of i-th row of gradient with j-th row of b
-                g_a_data[j_idx] = numpy.dot(g_ab[i], b[j])
+                g_a_data[j_idx] = numpy.dot(g_ab[i], b[j].T)[0, 0]
        out[0] = g_a_data

    def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
@@ -1869,6 +1910,8 @@ class StructuredDotGradCSR(gof.Op):

        """ % dict(locals(), **sub)

+    def infer_shape(self, node, shapes):
+        return [shapes[0]]

 sdg_csr = StructuredDotGradCSR()


--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -20,11 +20,12 @@ if enable_sparse == False:
 from theano.sparse.basic import _is_dense, _is_sparse, _mtypes
 from theano.sparse.basic import _is_dense_variable, _is_sparse_variable
 from theano.sparse import as_sparse_variable, CSC, CSR, CSM, CSMProperties
-from theano.sparse import SparseType, StructuredDotCSC
+from theano.sparse import SparseType, StructuredDotCSC, CSMGrad
+from theano.sparse import AddSS, AddSD, MulSS, MulSD, Transpose, Neg
 from theano.sparse import add, mul, structured_dot, transpose
 from theano.sparse import csc_from_dense, csr_from_dense, dense_from_sparse
 from theano.sparse import Dot, Usmm, UsmmCscDense
-from theano.sparse import get_item_2d, get_item_scalar
+#from theano.sparse import get_item_2d, get_item_scalar

 from theano.tests import unittest_tools as utt
 from theano import tensor
@@ -91,6 +92,103 @@ class T_transpose(unittest.TestCase):
        self.assertTrue(vta.shape == (3, 5))


+class SparseInferShapeTester(unittest.TestCase):
+    def setUp(self):
+        utt.seed_rng()
+
+    def _compile_and_check(self, inputs, outputs, numeric_inputs, cls):
+        outputs_function = theano.function(inputs, outputs)
+        shapes_function = theano.function(inputs, [o.shape for o in outputs])
+        # Check that the Op is removed from the compiled function.
+        topo_shape = shapes_function.maker.env.toposort()
+        assert not any(isinstance(t.op, cls) for t in topo_shape)
+        topo_out = outputs_function.maker.env.toposort()
+        assert any(isinstance(t.op, cls) for t in topo_out)
+        # Check that the shape produced agrees with the actual shape.
+        numeric_outputs = outputs_function(*numeric_inputs)
+        numeric_shapes = shapes_function(*numeric_inputs)
+        for out, shape in zip(numeric_outputs, numeric_shapes):
+            assert numpy.all(out.shape == shape)
+
+    def test_getitem_2d(self):
+        raise SkipTest('infer_shape not implemented for GetItem2d yet')
+
+    def test_csm_grad(self):
+        for sparsetype in ('csr', 'csc'):
+            x = tensor.vector()
+            y = tensor.ivector()
+            z = tensor.ivector()
+            s = tensor.ivector()
+            call = getattr(sp, sparsetype + '_matrix')
+            spm = call(random_lil((300, 400), config.floatX, 5))
+            out = tensor.grad(dense_from_sparse(
+                CSM(sparsetype)(x, y, z, s)
+            ).sum(), x)
+            self._compile_and_check([x, y, z, s],
+                                    [out],
+                                    [spm.data, spm.indices, spm.indptr,
+                                     spm.shape],
+                                    CSMGrad
+                                   )
+
+    def test_transpose(self):
+        x = SparseType('csr', dtype=config.floatX)()
+        self._compile_and_check([x],
+                                [x.T],
+                                [sp.csr_matrix(random_lil((10, 40),
+                                               config.floatX, 3))],
+                                Transpose)
+
+    def test_neg(self):
+        x = SparseType('csr', dtype=config.floatX)()
+        self._compile_and_check([x],
+                                [-x],
+                                [sp.csr_matrix(random_lil((10, 40),
+                                               config.floatX, 3))],
+                                Neg)
+
+    def test_add_ss(self):
+        x = SparseType('csr', dtype=config.floatX)()
+        y = SparseType('csr', dtype=config.floatX)()
+        self._compile_and_check([x, y],
+                                [x + y],
+                                [sp.csr_matrix(random_lil((10, 40),
+                                               config.floatX, 3)),
+                                 sp.csr_matrix(random_lil((10, 40),
+                                               config.floatX, 3))],
+                                AddSS)
+
+    def test_add_sd(self):
+        x = SparseType('csr', dtype=config.floatX)()
+        y = tensor.matrix()
+        self._compile_and_check([x, y],
+                                [x + y],
+                                [sp.csr_matrix(random_lil((10, 40),
+                                               config.floatX, 3)),
+                                 numpy.random.randn(10, 40)],
+                                AddSD)
+
+    def test_mul_ss(self):
+        x = SparseType('csr', dtype=config.floatX)()
+        y = SparseType('csr', dtype=config.floatX)()
+        self._compile_and_check([x, y],
+                                [x * y],
+                                [sp.csr_matrix(random_lil((10, 40),
+                                               config.floatX, 3)),
+                                ] * 2,
+                                MulSS)
+
+    def test_mul_sd(self):
+        x = SparseType('csr', dtype=config.floatX)()
+        y = tensor.matrix()
+        self._compile_and_check([x, y],
+                                [x * y],
+                                [sp.csr_matrix(random_lil((10, 40),
+                                               config.floatX, 3)),
+                                 numpy.random.randn(10, 40)],
+                                MulSD)
+
+
 class T_AddMul(unittest.TestCase):
    def testAddSS(self):
        self._testSS(add)
@@ -363,6 +461,22 @@ class test_structureddot(unittest.TestCase):
        utt.verify_grad(buildgraph,
                    [spmat.data, mat])

+    def test_infer_shape_csr_csc_grad(self):
+        for sparsetype in ('csr', 'csc'):
+            a = SparseType(sparsetype, dtype=config.floatX)()
+            b = SparseType(sparsetype, dtype=config.floatX)()
+            grads = tensor.grad(dense_from_sparse(structured_dot(a, b)).sum(),
+                                [a, b])
+            f = theano.function([a, b], [g.shape for g in grads])
+            topo = f.maker.env.toposort()
+            assert not any(isinstance(t, self.__class__) for t in topo)
+            call = getattr(sp, sparsetype + '_matrix')
+            x = call(random_lil((500, 300), config.floatX, 10))
+            y = call(random_lil((300, 400), config.floatX, 5))
+            out1, out2 = f(x, y)
+            assert numpy.all(out1 == x.shape)
+            assert numpy.all(out2 == y.shape)
+
    def test_upcast(self):

        typenames = ('float32', 'int64', 'int8', 'int32',
@@ -553,6 +667,16 @@ class test_structureddot(unittest.TestCase):
                self.assertFalse(theano_time > overhead_rtol * scipy_time +
                                 overhead_tol)

+    def test_infer_shape(self):
+        a = SparseType('csc', dtype=config.floatX)()
+        b = SparseType('csc', dtype=config.floatX)()
+        f = theano.function([a, b], structured_dot(a, b).shape)
+        topo = f.maker.env.toposort()
+        assert not any(isinstance(t, self.__class__) for t in topo)
+        x = sp.csc_matrix((4, 5), dtype=config.floatX)
+        y = sp.csc_matrix((5, 3), dtype=config.floatX)
+        assert numpy.all(f(x, y) == numpy.array((4, 3)))
+

 class DotTests(unittest.TestCase):
    def setUp(self):
@@ -1028,7 +1152,7 @@ class Test_getitem(unittest.TestCase):
            assert r10.shape == t10.shape
            assert numpy.all(r10.toarray() == t10.toarray())

-            f11 = theano.function([x, a], x[:,a:])
+            f11 = theano.function([x, a], x[:, a:])
            r11 = f11(vx, p)
            t11 = vx[:, p:]
            assert r11.shape == t11.shape
@@ -1057,7 +1181,7 @@ class Test_getitem(unittest.TestCase):
            self.assertRaises(ValueError,
                    x.__getitem__, slice(tensor.fscalar('f'), None))
            self.assertRaises(ValueError,
-                    x.__getitem__, (slice(None), slice([1,3,4], None)))
+                    x.__getitem__, (slice(None), slice([1, 3, 4], None)))

    def test_GetItemScalar(self):
        sparse_formats = ('csc', 'csr')