提交 c2aa9c43 authored 作者: test_rng_mrg.py's avatar test_rng_mrg.py

Fred pointed out that unittests should not explicitely compare c & python

implementations (on CPU). DebugMode already takes care of this.
上级 90ddfc45
...@@ -535,6 +535,7 @@ class GPU_mrg_uniform(mrg_uniform_base): ...@@ -535,6 +535,7 @@ class GPU_mrg_uniform(mrg_uniform_base):
{ {
unsigned int threads_per_block = std::min(n_streams, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK); unsigned int threads_per_block = std::min(n_streams, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
unsigned int n_blocks = std::min(ceil_intdiv(n_streams, threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS); unsigned int n_blocks = std::min(ceil_intdiv(n_streams, threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS);
if (threads_per_block * n_blocks < n_streams) if (threads_per_block * n_blocks < n_streams)
{ {
fprintf(stderr, "WARNING: unused streams above %%i (Tune GPU_mrg get_n_streams)\\n", threads_per_block * n_blocks ); fprintf(stderr, "WARNING: unused streams above %%i (Tune GPU_mrg get_n_streams)\\n", threads_per_block * n_blocks );
...@@ -630,8 +631,11 @@ class MRG_RandomStreams(object): ...@@ -630,8 +631,11 @@ class MRG_RandomStreams(object):
for s in size: for s in size:
r *= s r *= s
if r > 6: if r > 6:
return r/6 # chosen as fastest for rbm_benchmark r = r/6 # chosen as fastest for rbm_benchmark
else:
# make sure its a multiple of 256 so that CPU and GPU work the same way
r = numpy.ceil(r/256.) * 256
return r return r
print >> sys.stderr, "MRG_RandomStreams Can't determine #streams from size (%s), guessing 30*256"%str(size) print >> sys.stderr, "MRG_RandomStreams Can't determine #streams from size (%s), guessing 30*256"%str(size)
...@@ -662,6 +666,7 @@ class MRG_RandomStreams(object): ...@@ -662,6 +666,7 @@ class MRG_RandomStreams(object):
if nstreams is None: if nstreams is None:
nstreams = self.n_streams(size) nstreams = self.n_streams(size)
if self.use_cuda and dtype=='float32': if self.use_cuda and dtype=='float32':
rstates = self.get_substream_rstates(nstreams) rstates = self.get_substream_rstates(nstreams)
rstates = rstates.flatten() rstates = rstates.flatten()
......
...@@ -350,7 +350,7 @@ def test_uniform(): ...@@ -350,7 +350,7 @@ def test_uniform():
for node in f.maker.env.toposort()]) for node in f.maker.env.toposort()])
theano.printing.debugprint(f) theano.printing.debugprint(f)
cpu_c_out = f(*input) cpu_c_out = f(*input)
#pickle.dump(cpu_c_out, open('debug_rng_cpu_c.pkl','w')) pickle.dump(cpu_c_out, open('debug_rng_cpu_c.pkl','w'))
print 'random?[:10]\n' print 'random?[:10]\n'
print cpu_c_out[0,0:10] print cpu_c_out[0,0:10]
...@@ -358,24 +358,6 @@ def test_uniform(): ...@@ -358,24 +358,6 @@ def test_uniform():
#print 'random?[-1,-10:]\n', cpu_c_out[-1,-10:] #print 'random?[-1,-10:]\n', cpu_c_out[-1,-10:]
basictest(f, steps, sample_size, prefix='mrg cpu (C)', inputs=input) basictest(f, steps, sample_size, prefix='mrg cpu (C)', inputs=input)
#### TEST CPU (PYTHON) IMPLEMENTATION ####
print ''
print 'ON CPU (Python) with size=(%s):'%str(size)
R = MRG_RandomStreams(234, use_cuda=False)
u = R.uniform(size=size)
f = theano.function(var_input, u, mode=theano.Mode(linker='py'))
assert any([isinstance(node.op,theano.sandbox.rng_mrg.mrg_uniform)
for node in f.maker.env.toposort()])
theano.printing.debugprint(f)
cpu_py_out = f(*input)
#pickle.dump(cpu_py_out, open('debug_rng_cpu_py.pkl','w'))
print 'random?[:10]\n'
print cpu_py_out[0,0:10]
print cpu_py_out[-1,0:10]
#print 'random?[-1,-10:]\n', cpu_py_out[-1,-10:]
#basictest(f, steps, sample_size, prefix='mrg cpu (Python)', inputs=input)
if mode!='FAST_COMPILE' and cuda_available: if mode!='FAST_COMPILE' and cuda_available:
print '' print ''
print 'ON GPU with size=(%s):'%str(size) print 'ON GPU with size=(%s):'%str(size)
...@@ -389,7 +371,7 @@ def test_uniform(): ...@@ -389,7 +371,7 @@ def test_uniform():
for node in f.maker.env.toposort()]) for node in f.maker.env.toposort()])
theano.printing.debugprint(f) theano.printing.debugprint(f)
gpu_out = numpy.asarray(f(*input)) gpu_out = numpy.asarray(f(*input))
#pickle.dump(gpu_out, open('debug_rng_gpu.pkl','w')) pickle.dump(gpu_out, open('debug_rng_gpu.pkl','w'))
print 'random?[:10]\n' print 'random?[:10]\n'
print gpu_out[0,0:10] print gpu_out[0,0:10]
...@@ -397,9 +379,7 @@ def test_uniform(): ...@@ -397,9 +379,7 @@ def test_uniform():
#print 'random?[-1,-10:]\n', gpu_out[-1,-10:] #print 'random?[-1,-10:]\n', gpu_out[-1,-10:]
basictest(f, steps, sample_size, prefix='mrg gpu', inputs=input) basictest(f, steps, sample_size, prefix='mrg gpu', inputs=input)
numpy.testing.assert_array_almost_equal(cpu_c_out, gpu_out, decimal=6)
numpy.testing.assert_array_almost_equal(cpu_c_out, cpu_py_out, decimal=4)
numpy.testing.assert_array_almost_equal(cpu_c_out, gpu_out, decimal=4)
print '' print ''
print 'ON CPU w Numpy with size=(%s):'%str(size) print 'ON CPU w Numpy with size=(%s):'%str(size)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论