Merge pull request #4066 from cooijmanstim/big_batched_dot

GpuBatchedDot: streams implementation (WIP)

Merge pull request #4066 from cooijmanstim/big_batched_dot
08857dc5 · abergeron · 3b1c665f · 96ef4da1 · 08857dc5 · 08857dc5
--- a/theano/sandbox/cuda/blas.py
+++ b/theano/sandbox/cuda/blas.py
--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -48,6 +48,9 @@ class TestBatchedDot(unittest_tools.InferShapeTester):
    mode = mode_with_gpu
    def test_batched_dot_correctness(self):
+        # test both implementations
+        for threshold in [0, 100]:
+            batched_dot = GpuBatchedDot(stream_threshold=threshold)
            def cmp(a_shp, b_shp):
@@ -109,8 +112,9 @@ class TestBatchedDot(unittest_tools.InferShapeTester):
        self.assertRaises(RuntimeError, fail, (5,4,3), (5,2,2))
    def test_batched_dot_gradient(self):
+        for threshold in [0, 100]:
            unittest_tools.verify_grad(
-            batched_dot,
+                GpuBatchedDot(stream_threshold=threshold),
                [numpy.random.randn(5,7,2).astype(numpy.float32),
                 numpy.random.randn(5,2,6).astype(numpy.float32)],
                mode=mode_with_gpu)