Merge pull request #5134 from abergeron/fix_random

Fix a problem where the samples buffer would not be filled.

Merge pull request #5134 from abergeron/fix_random
25f0dee3 · Pascal Lamblin · GitHub · e6a3b009 · 5211eb8a · 25f0dee3
--- a/.jenkins/jenkins_test2.sh
+++ b/.jenkins/jenkins_test2.sh
@@ -74,6 +74,7 @@ export PYTHONPATH=${PYTHONPATH}:$LIBDIR/lib/python
 THEANO_GPUARRAY_TESTS="theano/gpuarray/tests \
                       theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_serial \
                       theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel \
+                       theano/sandbox/tests/test_rng_mrg.py:test_GPUA_full_fill \
                       theano/scan_module/tests/test_scan.py:T_Scan_Gpuarray"
 FLAGS="init_gpu_device=$DEVICE,gpuarray.preallocate=1000,mode=FAST_RUN"
 THEANO_FLAGS=${FLAGS} time nosetests -v --with-xunit --xunit-file=theanogpuarray_tests.xml ${THEANO_GPUARRAY_TESTS}
--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -1033,21 +1033,18 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
          n_streams = n_elements;
        {
-          void *args[4];
          size_t ls = 0, gs = 0;
-          args[0] = %(o_sample)s->ga.data;
+          int err = GpuKernel_sched(&%(kname)s, n_streams, &ls, &gs);
-          args[1] = %(o_rstate)s->ga.data;
-          args[2] = &n_elements;
-          args[3] = &n_streams;
-          int err = GpuKernel_sched(&%(kname)s, n_elements, &ls, &gs);
          if (err != GA_NO_ERROR) {
              PyErr_Format(PyExc_RuntimeError, "GpuKernel_sched: %%s\\n",
                           GpuKernel_error(&%(kname)s, err));
              %(fail)s
          }
-          err = GpuKernel_call(&%(kname)s, 1, &ls, &gs, 0, args);
+          // Make sure we run as many blocks as we need to cover the whole n_streams
+          gs = (n_streams + ls - 1)/ls;
+          err = mrg_uniform_call(1, &ls, &gs, 0, %(o_sample)s->ga.data, %(o_rstate)s->ga.data, n_elements, n_streams);
          if (err != GA_NO_ERROR) {
-              PyErr_Format(PyExc_RuntimeError, "GpuKernel_call: %%s\\n",
+              PyErr_Format(PyExc_RuntimeError, "mrg_uniform_call: %%s\\n",
                           GpuKernel_error(&%(kname)s, err));
              %(fail)s
          }
@@ -1055,7 +1052,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
        """ % locals()
    def c_code_cache_version(self):
-        return (11,)
+        return (12,)
 def guess_n_streams(size, warn=False):

--- a/theano/sandbox/tests/test_rng_mrg.py
+++ b/theano/sandbox/tests/test_rng_mrg.py
@@ -361,13 +361,9 @@ def test_GPU_nstreams_limit():
 def test_consistency_GPUA_serial():
-    """
+    # Verify that the random numbers generated by GPUA_mrg_uniform, serially,
-    Verify that the random numbers generated by GPUA_mrg_uniform, serially,
+    # are the same as the reference (Java) implementation by L'Ecuyer et al.
-    are the same as the reference (Java) implementation by L'Ecuyer et al.
+    from theano.gpuarray.tests.config import mode_with_gpu as mode
-    """
-    from theano.gpuarray.tests.test_basic_ops import \
-        mode_with_gpu as mode
    from theano.gpuarray.type import gpuarray_shared_constructor
    seed = 12345
@@ -415,14 +411,10 @@ def test_consistency_GPUA_serial():
 def test_consistency_GPUA_parallel():
-    """
+    # Verify that the random numbers generated by GPUA_mrg_uniform, in
-    Verify that the random numbers generated by GPUA_mrg_uniform, in
+    # parallel, are the same as the reference (Java) implementation by
-    parallel, are the same as the reference (Java) implementation by
+    # L'Ecuyer et al.
-    L'Ecuyer et al.
+    from theano.gpuarray.tests.config import mode_with_gpu as mode
-    """
-    from theano.gpuarray.tests.test_basic_ops import \
-        mode_with_gpu as mode
    from theano.gpuarray.type import gpuarray_shared_constructor
    seed = 12345
@@ -468,6 +460,29 @@ def test_consistency_GPUA_parallel():
    assert(numpy.allclose(samples, java_samples))
+def test_GPUA_full_fill():
+    # Make sure the whole sample buffer is filled.  Also make sure
+    # large samples are consistent with CPU results.
+    import theano.gpuarray.tests.config
+    from theano.gpuarray.type import gpuarray_shared_constructor
+    # This needs to be large to trigger the problem on GPU
+    size = (10, 1000)
+    R = MRG_RandomStreams(234, use_cuda=False)
+    uni = R.uniform(size, nstreams=60 * 256)
+    f_cpu = theano.function([], uni)
+    rstate_gpu = gpuarray_shared_constructor(R.state_updates[-1][0].get_value())
+    new_rstate, sample = rng_mrg.GPUA_mrg_uniform.new(rstate_gpu, ndim=None,
+                                                      dtype='float32',
+                                                      size=size)
+    rstate_gpu.default_update = new_rstate
+    f_gpu = theano.function([], sample)
+    utt.assert_allclose(f_cpu(), f_gpu())
 def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=None,
              target_avg=0.5, target_std=None, mean_rtol=0.01, std_tol=0.01):
    if inputs is None: