Add tests for BlockSparse gemv and outer

7748ec15 · Alexandre de Brebisson · Xavier Bouthillier · 658bf2ef · 7748ec15 · 7748ec15
--- a/theano/sandbox/cuda/tests/test_blocksparse.py
+++ b/theano/sandbox/cuda/tests/test_blocksparse.py
 import numpy
-from numpy.random import randn
-from unittest import TestCase
 from nose.plugins.skip import SkipTest
 import theano
 from theano import tensor
 import theano.tests.unittest_tools as utt
+import theano.sandbox.tests.test_blocksparse
 import theano.sandbox.cuda as cuda_ndarray
 if not cuda_ndarray.cuda_available:
    raise SkipTest('Optional package cuda disabled')
+from theano.sandbox.cuda.blocksparse import (GpuSparseBlockOuter,
-from theano.sandbox.cuda.basic_ops import (GpuDimShuffle,
+                                             gpu_sparse_block_gemv,
-                                           as_cuda_ndarray_variable)
+                                             gpu_sparse_block_outer)
-from theano.sandbox.cuda.blocksparse import (sparse_block_dot_SS,
-                                             sparse_block_gemv_ss,
-                                             sparse_block_outer_ss,
-                                             sparse_block_outer_ss_inplace,
-                                             SparseBlockOuterSS)
 from theano.sandbox.cuda.var import float32_shared_constructor
@@ -29,187 +21,56 @@ else:
    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
-def setup():
+class BlockSparse_Gemv_and_Outer(
-    utt.seed_rng()
+        theano.sandbox.tests.test_blocksparse.BlockSparse_Gemv_and_Outer):
+    def setUp(self):
+        utt.seed_rng()
-def blocksparse_data():
+        self.mode = mode_with_gpu.excluding('constant_folding')
-    nInputBlock = 128
+        self.gemv_op = gpu_sparse_block_gemv
-    nOutputBlock = 64
+        self.outer_op = gpu_sparse_block_outer
-    inputSize = 40
-    outputSize = 30
-    inputWindowSize = 7
-    outputWindowSize = 9
-    batchSize = 2
-    input = randn(batchSize, inputWindowSize, inputSize).astype('float32')
-    permutation = numpy.random.permutation
-    inputIndice = numpy.vstack(permutation(nInputBlock)[:inputWindowSize]
-                               for _ in range(batchSize))
-    outputIndice = numpy.vstack(permutation(nOutputBlock)[:outputWindowSize]
-                                for _ in range(batchSize))
-    weight = randn(nInputBlock, nOutputBlock,
-                   inputSize, outputSize).astype('float32')
-    bias = randn(nOutputBlock, outputSize).astype('float32')
-    return weight, input, inputIndice, bias, outputIndice
-def blocksparse(W, h, iIdx, b, oIdx):
-    o = b.take(oIdx, axis=0)
-    for b in range(o.shape[0]):
-        for j in range(o.shape[1]):
-            outputIdx = oIdx[b, j]
-            for i in range(h.shape[1]):
-                inputIdx = iIdx[b, i]
-                w = W[inputIdx, outputIdx]
-                # this below is a gemv I think
-                o[b, j, :] += numpy.dot(h[b, i], w)
-    return o
-def test_blocksparse():
-    b = tensor.fmatrix()
-    W = tensor.ftensor4()
-    h = tensor.ftensor3()
-    iIdx = tensor.lmatrix()
-    oIdx = tensor.lmatrix()
-    o = sparse_block_dot_SS(W, h, iIdx, b, oIdx)
-    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)
-    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()
-    th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
-    ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val)
-    utt.assert_allclose(ref_out, th_out)
-test_blocksparse.setup = setup
-# test the fortan order for W (which can happen in the grad for some graphs).
-def test_blocksparseF():
-    b = tensor.fmatrix()
-    W = tensor.ftensor4()
-    h = tensor.ftensor3()
-    iIdx = tensor.lmatrix()
-    oIdx = tensor.lmatrix()
-    o = sparse_block_dot_SS(GpuDimShuffle((False, False, False, False),
-                                          (0, 1, 3, 2))(
-                                              as_cuda_ndarray_variable(W)),
-                            h, iIdx, b, oIdx)
-    f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)
-    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()
-    th_out = f(numpy.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val)
-    ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val)
-    utt.assert_allclose(ref_out, th_out)
-def test_blocksparse_grad():
-    h_val = randn(1, 2, 3).astype('float32')
-    iIdx_val = numpy.random.permutation(3)[:2][None, :]
-    oIdx_val = numpy.random.permutation(3)[:2][None, :]
-    W_val = randn(3, 3, 3, 4).astype('float32')
-    b_val = randn(3, 4).astype('float32')
-    iIdx = theano.tensor.constant(iIdx_val)
-    oIdx = theano.tensor.constant(oIdx_val)
-    def f(b, h, W):
-        return sparse_block_gemv_ss(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-    utt.verify_grad(f, [b_val, h_val, W_val], mode=mode_with_gpu)
-def test_blocksparse_grad_1():
-    # This tests that we correctly handle cases where dimensions are 1.
-    h_val = randn(1, 1, 1).astype('float32')
-    iIdx_val = numpy.random.permutation(1)[:1][None, :]
-    oIdx_val = numpy.random.permutation(1)[:1][None, :]
-    W_val = randn(1, 1, 1, 1).astype('float32')
-    b_val = randn(1, 1).astype('float32')
-    iIdx = theano.tensor.constant(iIdx_val)
-    oIdx = theano.tensor.constant(oIdx_val)
-    def f(b, h, W):
-        return sparse_block_gemv_ss(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-    utt.verify_grad(f, [b_val, h_val, W_val], mode=mode_with_gpu)
-def test_blocksparse_grad_shape():
-    b = tensor.fmatrix()
-    W = tensor.ftensor4()
-    h = tensor.ftensor3()
-    iIdx = tensor.lmatrix()
-    oIdx = tensor.lmatrix()
-    o = sparse_block_gemv_ss(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-    go = theano.grad(o.sum(), [b, W, h])
-    f = theano.function([W, h, iIdx, b, oIdx], go, mode=mode_with_gpu)
-    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()
-    # just make sure that it runs correcly and all the shapes are ok.
-    b_g, W_g, h_g = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
-    assert b_g.shape == b_val.shape
-    assert h_g.shape == h_val.shape
-    assert W_g.shape == W_val.shape
-# This test is temporarily disabled since we disabled the output_merge
+    # This test is temporarily disabled since we disabled the output_merge
-# and alpha_merge optimizations for blocksparse due to brokeness.
+    # and alpha_merge optimizations for blocksparse due to brokeness.
-# Re-enable when those are re-added.
+    # Re-enable when those are re-added.
-def Xtest_blocksparse_grad_merge():
+    def Xtest_blocksparse_grad_merge(self):
-    b = tensor.fmatrix()
+        b = tensor.fmatrix()
-    h = tensor.ftensor3()
+        h = tensor.ftensor3()
-    iIdx = tensor.lmatrix()
+        iIdx = tensor.lmatrix()
-    oIdx = tensor.lmatrix()
+        oIdx = tensor.lmatrix()
-    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()
+        W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
-    W = float32_shared_constructor(W_val)
+        W = float32_shared_constructor(W_val)
-    o = sparse_block_gemv_ss(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
+        o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
-    gW = theano.grad(o.sum(), W)
+        gW = theano.grad(o.sum(), W)
-    lr = numpy.asarray(0.05, dtype='float32')
+        lr = numpy.asarray(0.05, dtype='float32')
-    upd = W - lr * gW
+        upd = W - lr * gW
-    f1 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)],
+        f1 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)],
-                         mode=mode_with_gpu)
+                             mode=mode_with_gpu)
-    # Make sure the lr update was merged.
+        # Make sure the lr update was merged.
-    assert isinstance(f1.maker.fgraph.outputs[0].owner.op, SparseBlockOuterSS)
+        assert isinstance(f1.maker.fgraph.outputs[0].owner.op,
+                          GpuSparseBlockOuter)
-    # Exclude the merge optimizations.
+        # Exclude the merge optimizations.
-    mode = mode_with_gpu.excluding('local_merge_blocksparse_alpha')
+        mode = mode_with_gpu.excluding('local_merge_blocksparse_alpha')
-    mode = mode.excluding('local_merge_blocksparse_output')
+        mode = mode.excluding('local_merge_blocksparse_output')
-    f2 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)
+        f2 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)
-    # Make sure the lr update is not merged.
+        # Make sure the lr update is not merged.
-    assert not isinstance(f2.maker.fgraph.outputs[0].owner.op,
+        assert not isinstance(f2.maker.fgraph.outputs[0].owner.op,
-                          SparseBlockOuterSS)
+                              GpuSparseBlockOuter)
-    f2(h_val, iIdx_val, b_val, oIdx_val)
+        f2(h_val, iIdx_val, b_val, oIdx_val)
-    W_ref = W.get_value()
+        W_ref = W.get_value()
-    # reset the var
+        # reset the var
-    W.set_value(W_val)
+        W.set_value(W_val)
-    f1(h_val, iIdx_val, b_val, oIdx_val)
+        f1(h_val, iIdx_val, b_val, oIdx_val)
-    W_opt = W.get_value()
+        W_opt = W.get_value()
-    utt.assert_allclose(W_ref, W_opt)
+        utt.assert_allclose(W_ref, W_opt)
--- a/theano/sandbox/tests/__init__.py
+++ b/theano/sandbox/tests/__init__.py
--- a/theano/sandbox/tests/test_blocksparse.py
+++ b/theano/sandbox/tests/test_blocksparse.py
+"""
+    Tests for block sparse dot
+"""
+import unittest
+import time
+import numpy
+from numpy.random import randn
+import theano
+from theano import tensor
+import theano.tests.unittest_tools as utt
+from theano.sandbox.blocksparse import sparse_block_dot, cpu_sparse_block_gemv, \
+    cpu_sparse_block_outer
+class BlockSparse_Gemv_and_Outer(unittest.TestCase):
+    def runTest(self):
+        pass
+    def setUp(self):
+        utt.seed_rng()
+        self.mode = theano.compile.get_default_mode().excluding(
+            'constant_folding'
+        )
+        self.gemv_op = cpu_sparse_block_gemv
+        self.outer_op = cpu_sparse_block_outer
+    @staticmethod
+    def gemv_data():
+        nInputBlock = 8
+        nOutputBlock = 7
+        inputSize = 6
+        outputSize = 5
+        inputWindowSize = 4
+        outputWindowSize = 3
+        batchSize = 2
+#        nInputBlock = 2
+#        nOutputBlock = 2
+#        inputSize = 2
+#        outputSize = 2
+#        inputWindowSize = 1
+#        outputWindowSize = 1
+#        batchSize = 1
+        input = randn(batchSize, inputWindowSize, inputSize).astype('float32')
+        permutation = numpy.random.permutation
+        inputIndice = numpy.vstack(permutation(nInputBlock)[:inputWindowSize]
+                                   for _ in range(batchSize)).astype('int32')
+        outputIndice = numpy.vstack(
+            permutation(nOutputBlock)[:outputWindowSize]
+            for _ in range(batchSize)).astype('int32')
+        weight = randn(nInputBlock, nOutputBlock,
+                       inputSize, outputSize).astype('float32')
+        bias = randn(nOutputBlock, outputSize).astype('float32')
+        return weight, input, inputIndice, bias, outputIndice
+    @staticmethod
+    def outer_data():
+        nInputBlock = 8
+        nOutputBlock = 7
+        xSize = 6
+        ySize = 5
+        xWindowSize = 4
+        yWindowSize = 3
+        batchSize = 2
+        o = randn(nInputBlock, nOutputBlock, xSize, ySize).astype('float32')
+        x = randn(batchSize, xWindowSize, xSize).astype('float32')
+        y = randn(batchSize, yWindowSize, ySize).astype('float32')
+        randint = numpy.random.randint
+        xIdx = numpy.vstack(randint(0, nInputBlock, size=xWindowSize)
+                            for _ in range(batchSize)).astype('int32')
+        yIdx = numpy.vstack(randint(0, nOutputBlock, size=yWindowSize)
+                            for _ in range(batchSize)).astype('int32')
+        return o, x, y, xIdx, yIdx
+    @staticmethod
+    def gemv_numpy(o, W, h, iIdx, oIdx):
+        for b in range(o.shape[0]):
+            for j in range(o.shape[1]):
+                outputIdx = oIdx[b, j]
+                for i in range(h.shape[1]):
+                    inputIdx = iIdx[b, i]
+                    w = W[inputIdx, outputIdx]
+                    o[b, j, :] += numpy.dot(h[b, i], w)
+        return o
+    @staticmethod
+    def gemv_numpy2(o, W, h, iIdx, oIdx):
+        from numpy import ix_
+        for b in range(o.shape[0]):
+            w = W[ix_(iIdx[b], oIdx[b])].swapaxes(1, 2)
+            w = w.reshape((w.shape[0] * w.shape[1], w.shape[2] * w.shape[3]))
+            o[b] += numpy.dot(h[b].ravel(), w).reshape(o.shape[1:])
+        return o
+    @staticmethod
+    def gemv_numpy3(o, W, h, iIdx, oIdx):
+        from numpy import ix_
+        for b in range(o.shape[0]):
+            w = W[ix_(iIdx[b], oIdx[b])]
+            # o[b] += (h[b][:, None, :, None] * w).sum(axis=(0, 2))
+            # o[b] += numpy.tensordot(h[b], w, [(0,1),(0,2)])
+            o[b] += numpy.einsum('ik,ijkl', h[b], w)
+        return o
+    @staticmethod
+    def gemv_data2():
+        nInputBlock = 100
+        nOutputBlock = 100
+        inputSize = 50
+        outputSize = 50
+        inputWindowSize = 30
+        outputWindowSize = 30
+        batchSize = 1
+        input = randn(batchSize, inputWindowSize, inputSize).astype('float32')
+        permutation = numpy.random.permutation
+        inputIndice = numpy.vstack(permutation(nInputBlock)[:inputWindowSize]
+                                   for _ in range(batchSize)).astype('int32')
+        outputIndice = numpy.vstack(
+            permutation(nOutputBlock)[:outputWindowSize]
+            for _ in range(batchSize)).astype('int32')
+        weight = randn(nInputBlock, nOutputBlock,
+                       inputSize, outputSize).astype('float32')
+        bias = randn(nOutputBlock, outputSize).astype('float32')
+        return weight, input, inputIndice, bias, outputIndice
+    @staticmethod
+    def compare():
+        W_val, h_val, iIdx_val, b_val, oIdx_val = \
+            BlockSparse_Gemv_and_Outer.gemv_data2()
+        start = time.clock()
+        ref_out = BlockSparse_Gemv_and_Outer.gemv_numpy(
+             b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
+        v1 = time.clock()
+        ref_out_2 = BlockSparse_Gemv_and_Outer.gemv_numpy2(
+             b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
+        v2 = time.clock()
+        ref_out_3 = BlockSparse_Gemv_and_Outer.gemv_numpy3(
+             b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
+        v3 = time.clock()
+        print v1 - start
+        print v2 - v1
+        print v3 - v2
+        # utt.assert_allclose(ref_out, ref_out_2)
+    @staticmethod
+    def outer_numpy(o, x, y, xIdx, yIdx):
+        for b in range(x.shape[0]):
+            for i in range(xIdx.shape[1]):
+                for j in range(yIdx.shape[1]):
+                    o[xIdx[b, i], yIdx[b, j]] += numpy.outer(x[b,  i, :],
+                                                             y[b, j, :])
+        return o
+    def test_sparseblockdot(self):
+        """
+        Compares the numpy version of sparseblockgemv to sparse_block_dot.
+        """
+        b = tensor.fmatrix()
+        W = tensor.ftensor4()
+        h = tensor.ftensor3()
+        iIdx = tensor.imatrix()
+        oIdx = tensor.imatrix()
+        o = sparse_block_dot(W, h, iIdx, b, oIdx)
+        f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
+        W_val, h_val, iIdx_val, b_val, oIdx_val = \
+            BlockSparse_Gemv_and_Outer.gemv_data()
+        th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
+        ref_out = BlockSparse_Gemv_and_Outer.gemv_numpy(
+             b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
+        utt.assert_allclose(ref_out, th_out)
+    def test_sparseblockgemv(self):
+        """
+        Compares the numpy and theano versions of sparseblockgemv.
+        """
+        b = tensor.fmatrix()
+        W = tensor.ftensor4()
+        h = tensor.ftensor3()
+        iIdx = tensor.imatrix()
+        oIdx = tensor.imatrix()
+        o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
+        f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
+        W_val, h_val, iIdx_val, b_val, oIdx_val = \
+            BlockSparse_Gemv_and_Outer.gemv_data()
+        th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
+        ref_out = BlockSparse_Gemv_and_Outer.gemv_numpy(
+             b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
+        utt.assert_allclose(ref_out, th_out)
+    def test_sparseblockgemvF(self):
+        """
+            Test the fortan order for W (which can happen in the grad for some
+            graphs).
+        """
+        b = tensor.fmatrix()
+        W = tensor.ftensor4()
+        h = tensor.ftensor3()
+        iIdx = tensor.imatrix()
+        oIdx = tensor.imatrix()
+        o = self.gemv_op(b.take(oIdx, axis=0),
+            tensor.DimShuffle((False, False, False, False),
+                              (0, 1, 3, 2))(tensor.as_tensor_variable(W)),
+            h, iIdx, oIdx)
+        f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
+        W_val, h_val, iIdx_val, b_val, oIdx_val = \
+            BlockSparse_Gemv_and_Outer.gemv_data()
+        th_out = f(numpy.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val,
+                   oIdx_val)
+        ref_out = BlockSparse_Gemv_and_Outer.gemv_numpy(
+             b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
+        utt.assert_allclose(ref_out, th_out)
+    def test_sparseblockgemv_grad(self):
+        W_val, h_val, iIdx_val, b_val, oIdx_val = \
+            BlockSparse_Gemv_and_Outer.gemv_data()
+        h_val = randn(1, 1, 1).astype('float32')
+        iIdx_val = numpy.random.permutation(1)[:1][None, :]
+        oIdx_val = numpy.random.permutation(1)[:1][None, :]
+        W_val = randn(1, 1, 1, 1).astype('float32')
+        b_val = randn(1, 1).astype('float32')
+        iIdx = theano.tensor.constant(iIdx_val)
+        oIdx = theano.tensor.constant(oIdx_val)
+        def metaop(b, h, W):
+            return sparse_block_dot(W, h, iIdx, b, oIdx)
+        def op(b, h, W):
+            return self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
+        utt.verify_grad(metaop, [b_val, h_val, W_val], mode=self.mode)
+        utt.verify_grad(op, [b_val, h_val, W_val], mode=self.mode)
+    def test_sparseblockgemv_grad_1(self):
+        """
+            Test that we correctly handle cases where dimensions are 1.
+        """
+        h_val = randn(1, 1, 1).astype('float32')
+        iIdx_val = numpy.random.permutation(1)[:1][None, :]
+        oIdx_val = numpy.random.permutation(1)[:1][None, :]
+        W_val = randn(1, 1, 1, 1).astype('float32')
+        b_val = randn(1, 1).astype('float32')
+        iIdx = theano.tensor.constant(iIdx_val)
+        oIdx = theano.tensor.constant(oIdx_val)
+        def metaop(b, h, W):
+            return sparse_block_dot(W, h, iIdx, b, oIdx)
+        def op(b, h, W):
+            return self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
+        utt.verify_grad(metaop, [b_val, h_val, W_val], mode=self.mode)
+        utt.verify_grad(op, [b_val, h_val, W_val], mode=self.mode)
+    def test_sparseblockgemv_grad_shape(self):
+        b = tensor.fmatrix()
+        W = tensor.ftensor4()
+        h = tensor.ftensor3()
+        iIdx = tensor.imatrix()
+        oIdx = tensor.imatrix()
+        o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
+        go = theano.grad(o.sum(), [b, W, h])
+        f = theano.function([W, h, iIdx, b, oIdx], go, mode=self.mode)
+        W_val, h_val, iIdx_val, b_val, oIdx_val = \
+            BlockSparse_Gemv_and_Outer.gemv_data()
+        # just make sure that it runs correcly and all the shapes are ok.
+        b_g, W_g, h_g = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
+        assert b_g.shape == b_val.shape
+        assert h_g.shape == h_val.shape
+        assert W_g.shape == W_val.shape
+    def test_sparseblockouter(self):
+        o = tensor.ftensor4()
+        x = tensor.ftensor3()
+        y = tensor.ftensor3()
+        xIdx = tensor.imatrix()
+        yIdx = tensor.imatrix()
+        out = self.outer_op(o, x, y, xIdx, yIdx)
+        f = theano.function([o, x, y, xIdx, yIdx], out, on_unused_input="warn")
+        o_val, x_val, y_val, xIdx_val, yIdx_val = \
+            BlockSparse_Gemv_and_Outer.outer_data()
+        th_out = f(o_val, x_val, y_val, xIdx_val, yIdx_val)
+        ref_out = BlockSparse_Gemv_and_Outer.outer_numpy(
+            o_val, x_val, y_val, xIdx_val, yIdx_val)
+        utt.assert_allclose(ref_out, th_out)