提交 25f0dee3 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5134 from abergeron/fix_random

Fix a problem where the samples buffer would not be filled.
...@@ -74,6 +74,7 @@ export PYTHONPATH=${PYTHONPATH}:$LIBDIR/lib/python ...@@ -74,6 +74,7 @@ export PYTHONPATH=${PYTHONPATH}:$LIBDIR/lib/python
THEANO_GPUARRAY_TESTS="theano/gpuarray/tests \ THEANO_GPUARRAY_TESTS="theano/gpuarray/tests \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_serial \ theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_serial \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel \ theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel \
theano/sandbox/tests/test_rng_mrg.py:test_GPUA_full_fill \
theano/scan_module/tests/test_scan.py:T_Scan_Gpuarray" theano/scan_module/tests/test_scan.py:T_Scan_Gpuarray"
FLAGS="init_gpu_device=$DEVICE,gpuarray.preallocate=1000,mode=FAST_RUN" FLAGS="init_gpu_device=$DEVICE,gpuarray.preallocate=1000,mode=FAST_RUN"
THEANO_FLAGS=${FLAGS} time nosetests -v --with-xunit --xunit-file=theanogpuarray_tests.xml ${THEANO_GPUARRAY_TESTS} THEANO_FLAGS=${FLAGS} time nosetests -v --with-xunit --xunit-file=theanogpuarray_tests.xml ${THEANO_GPUARRAY_TESTS}
...@@ -1033,21 +1033,18 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -1033,21 +1033,18 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
n_streams = n_elements; n_streams = n_elements;
{ {
void *args[4];
size_t ls = 0, gs = 0; size_t ls = 0, gs = 0;
args[0] = %(o_sample)s->ga.data; int err = GpuKernel_sched(&%(kname)s, n_streams, &ls, &gs);
args[1] = %(o_rstate)s->ga.data;
args[2] = &n_elements;
args[3] = &n_streams;
int err = GpuKernel_sched(&%(kname)s, n_elements, &ls, &gs);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuKernel_sched: %%s\\n", PyErr_Format(PyExc_RuntimeError, "GpuKernel_sched: %%s\\n",
GpuKernel_error(&%(kname)s, err)); GpuKernel_error(&%(kname)s, err));
%(fail)s %(fail)s
} }
err = GpuKernel_call(&%(kname)s, 1, &ls, &gs, 0, args); // Make sure we run as many blocks as we need to cover the whole n_streams
gs = (n_streams + ls - 1)/ls;
err = mrg_uniform_call(1, &ls, &gs, 0, %(o_sample)s->ga.data, %(o_rstate)s->ga.data, n_elements, n_streams);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuKernel_call: %%s\\n", PyErr_Format(PyExc_RuntimeError, "mrg_uniform_call: %%s\\n",
GpuKernel_error(&%(kname)s, err)); GpuKernel_error(&%(kname)s, err));
%(fail)s %(fail)s
} }
...@@ -1055,7 +1052,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base): ...@@ -1055,7 +1052,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
""" % locals() """ % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (11,) return (12,)
def guess_n_streams(size, warn=False): def guess_n_streams(size, warn=False):
......
...@@ -361,13 +361,9 @@ def test_GPU_nstreams_limit(): ...@@ -361,13 +361,9 @@ def test_GPU_nstreams_limit():
def test_consistency_GPUA_serial(): def test_consistency_GPUA_serial():
""" # Verify that the random numbers generated by GPUA_mrg_uniform, serially,
Verify that the random numbers generated by GPUA_mrg_uniform, serially, # are the same as the reference (Java) implementation by L'Ecuyer et al.
are the same as the reference (Java) implementation by L'Ecuyer et al. from theano.gpuarray.tests.config import mode_with_gpu as mode
"""
from theano.gpuarray.tests.test_basic_ops import \
mode_with_gpu as mode
from theano.gpuarray.type import gpuarray_shared_constructor from theano.gpuarray.type import gpuarray_shared_constructor
seed = 12345 seed = 12345
...@@ -415,14 +411,10 @@ def test_consistency_GPUA_serial(): ...@@ -415,14 +411,10 @@ def test_consistency_GPUA_serial():
def test_consistency_GPUA_parallel(): def test_consistency_GPUA_parallel():
""" # Verify that the random numbers generated by GPUA_mrg_uniform, in
Verify that the random numbers generated by GPUA_mrg_uniform, in # parallel, are the same as the reference (Java) implementation by
parallel, are the same as the reference (Java) implementation by # L'Ecuyer et al.
L'Ecuyer et al. from theano.gpuarray.tests.config import mode_with_gpu as mode
"""
from theano.gpuarray.tests.test_basic_ops import \
mode_with_gpu as mode
from theano.gpuarray.type import gpuarray_shared_constructor from theano.gpuarray.type import gpuarray_shared_constructor
seed = 12345 seed = 12345
...@@ -468,6 +460,29 @@ def test_consistency_GPUA_parallel(): ...@@ -468,6 +460,29 @@ def test_consistency_GPUA_parallel():
assert(numpy.allclose(samples, java_samples)) assert(numpy.allclose(samples, java_samples))
def test_GPUA_full_fill():
# Make sure the whole sample buffer is filled. Also make sure
# large samples are consistent with CPU results.
import theano.gpuarray.tests.config
from theano.gpuarray.type import gpuarray_shared_constructor
# This needs to be large to trigger the problem on GPU
size = (10, 1000)
R = MRG_RandomStreams(234, use_cuda=False)
uni = R.uniform(size, nstreams=60 * 256)
f_cpu = theano.function([], uni)
rstate_gpu = gpuarray_shared_constructor(R.state_updates[-1][0].get_value())
new_rstate, sample = rng_mrg.GPUA_mrg_uniform.new(rstate_gpu, ndim=None,
dtype='float32',
size=size)
rstate_gpu.default_update = new_rstate
f_gpu = theano.function([], sample)
utt.assert_allclose(f_cpu(), f_gpu())
def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=None, def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=None,
target_avg=0.5, target_std=None, mean_rtol=0.01, std_tol=0.01): target_avg=0.5, target_std=None, mean_rtol=0.01, std_tol=0.01):
if inputs is None: if inputs is None:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论