Add a test for the op version (but not for grad yet).

493e71a4 · Arnaud Bergeron · fabf1fdf · 493e71a4 · 493e71a4
--- a/theano/sandbox/cuda/blocksparse.py
+++ b/theano/sandbox/cuda/blocksparse.py
@@ -39,13 +39,13 @@ def gemm_batched(Al, Bl, Cl, m, n, k, lda, ldb, ldc,


 def gemv(alpha, A, x, beta, y):
-    assert A.shape[0] == x.shape[0]
-    assert A.shape[1] == y.shape[0]
+    assert A.shape[1] == x.shape[0]
+    assert A.shape[0] == y.shape[0]

    handle = scikits.cuda.misc._global_cublas_handle

    cublas.cublasSgemv(handle, 't', A.shape[1], A.shape[0], alpha,
-                       A.gpudata, A.strides[1], x.gpudata, x.strides[0],
+                       A.gpudata, A.strides[0], x.gpudata, x.strides[0],
                       beta, y.gpudata, y.strides[0])


@@ -90,6 +90,7 @@ class SparseBlockGemvDS(GpuOp):

    def perform(self, node, inputs, outputs):
        o, W, h, inputIdx, outputIdx = inputs
+        out = outputs[0]

        if not self.inplace:
            o = o.copy()
@@ -98,7 +99,7 @@ class SparseBlockGemvDS(GpuOp):
            out_id = outputIdx[j]
            for i in range(h.shape[0]):
                inp_id = inputIdx[i]
-                gemv(numpy.float32(1.0), W[out_id, inp_id],
+                gemv(numpy.float32(1.0), W[inp_id, out_id],
                     h[i], numpy.float32(1.0), o[j])

        out[0] = o

--- a/theano/sandbox/cuda/tests/test_blocksparse.py
+++ b/theano/sandbox/cuda/tests/test_blocksparse.py
@@ -5,7 +5,8 @@ import theano.tests.unittest_tools as utt
 import numpy
 from numpy.random import randn

-from theano.sandbox.cuda.blocksparse import sparse_block_dot_DS
+from theano.sandbox.cuda.blocksparse import (sparse_block_dot_DS,
+                                             sparse_block_gemv_ds)

 def blocksparse_data():
    nInputBlock = 128
@@ -55,3 +56,22 @@ def test_blocksparse():

    utt.assert_allclose(ref_out, th_out)

+
+def test_blocksparse_op():
+    b = tensor.fmatrix()
+    W = tensor.ftensor4()
+    h = tensor.fmatrix()
+    iIdx = tensor.lvector()
+    oIdx = tensor.lvector()
+
+
+    o = sparse_block_gemv_ds(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
+
+    f = theano.function([W, h, iIdx, b, oIdx], o)
+
+    W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()
+
+    th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
+    ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val)
+
+    utt.assert_allclose(ref_out, th_out)