Fred pointed out that unittests should not explicitely compare c & python

implementations (on CPU). DebugMode already takes care of this.

Fred pointed out that unittests should not explicitely compare c & python
c2aa9c43 · test_rng_mrg.py · 90ddfc45 · c2aa9c43 · c2aa9c43
--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -535,6 +535,7 @@ class GPU_mrg_uniform(mrg_uniform_base):
        {
            unsigned int threads_per_block = std::min(n_streams, (unsigned int)NUM_VECTOR_OP_THREADS_PER_BLOCK);
            unsigned int n_blocks = std::min(ceil_intdiv(n_streams, threads_per_block), (unsigned int)NUM_VECTOR_OP_BLOCKS);
            if (threads_per_block * n_blocks < n_streams)
            {
                fprintf(stderr, "WARNING: unused streams above %%i (Tune GPU_mrg get_n_streams)\\n", threads_per_block * n_blocks );
@@ -630,8 +631,11 @@ class MRG_RandomStreams(object):
            for s in size:
                r *= s
            if r > 6:
-                return r/6 # chosen as fastest for rbm_benchmark
+                r = r/6 # chosen as fastest for rbm_benchmark
-            else:
+            # make sure its a multiple of 256 so that CPU and GPU work the same way
+            r = numpy.ceil(r/256.) * 256
            return r
        print >> sys.stderr, "MRG_RandomStreams Can't determine #streams from size (%s), guessing 30*256"%str(size)
@@ -662,6 +666,7 @@ class MRG_RandomStreams(object):
        if nstreams is None:
            nstreams = self.n_streams(size)
        if self.use_cuda and dtype=='float32':
            rstates = self.get_substream_rstates(nstreams)
            rstates = rstates.flatten()

--- a/theano/sandbox/test_rng_mrg.py
+++ b/theano/sandbox/test_rng_mrg.py
@@ -350,7 +350,7 @@ def test_uniform():
                    for node in f.maker.env.toposort()])
        theano.printing.debugprint(f)
        cpu_c_out = f(*input)
-        #pickle.dump(cpu_c_out, open('debug_rng_cpu_c.pkl','w'))
+        pickle.dump(cpu_c_out, open('debug_rng_cpu_c.pkl','w'))
        print 'random?[:10]\n'
        print cpu_c_out[0,0:10]
@@ -358,24 +358,6 @@ def test_uniform():
        #print 'random?[-1,-10:]\n', cpu_c_out[-1,-10:]
        basictest(f, steps, sample_size, prefix='mrg cpu (C)', inputs=input)
-        #### TEST CPU (PYTHON) IMPLEMENTATION ####
-        print ''
-        print 'ON CPU (Python) with size=(%s):'%str(size)
-        R = MRG_RandomStreams(234, use_cuda=False)
-        u = R.uniform(size=size)
-        f = theano.function(var_input, u, mode=theano.Mode(linker='py'))
-        assert any([isinstance(node.op,theano.sandbox.rng_mrg.mrg_uniform) 
-                    for node in f.maker.env.toposort()])
-        theano.printing.debugprint(f)
-        cpu_py_out = f(*input)
-        #pickle.dump(cpu_py_out, open('debug_rng_cpu_py.pkl','w'))
-        print 'random?[:10]\n'
-        print cpu_py_out[0,0:10]
-        print cpu_py_out[-1,0:10]
-        #print 'random?[-1,-10:]\n', cpu_py_out[-1,-10:]
-        #basictest(f, steps, sample_size, prefix='mrg cpu (Python)', inputs=input)
        if mode!='FAST_COMPILE' and cuda_available:
            print ''
            print 'ON GPU with size=(%s):'%str(size)
@@ -389,7 +371,7 @@ def test_uniform():
                        for node in f.maker.env.toposort()])
            theano.printing.debugprint(f)
            gpu_out = numpy.asarray(f(*input))
-            #pickle.dump(gpu_out, open('debug_rng_gpu.pkl','w'))
+            pickle.dump(gpu_out, open('debug_rng_gpu.pkl','w'))
            print 'random?[:10]\n'
            print gpu_out[0,0:10]
@@ -397,9 +379,7 @@ def test_uniform():
            #print 'random?[-1,-10:]\n', gpu_out[-1,-10:]
            basictest(f, steps, sample_size, prefix='mrg  gpu', inputs=input)
+        numpy.testing.assert_array_almost_equal(cpu_c_out, gpu_out, decimal=6)
-        numpy.testing.assert_array_almost_equal(cpu_c_out, cpu_py_out, decimal=4)
-        numpy.testing.assert_array_almost_equal(cpu_c_out, gpu_out, decimal=4)
        print ''
        print 'ON CPU w Numpy with size=(%s):'%str(size)