merge

58783627 · James Bergstra · a51e621e · 31380660 · 58783627 · 58783627
--- a/doc/library/scan.txt
+++ b/doc/library/scan.txt
@@ -32,7 +32,7 @@ The equivalent Theano code would be
  # Symbolic description of the result
  result,updates = theano.scan(fn = lambda x_tm1,A: x_tm1*A,\
-                       info_outputs = T.ones_like(A),\
+                       outputs_info = T.ones_like(A),\
                       non_sequences  = A, \
                       n_steps        = k)
@@ -112,7 +112,7 @@ the Theano variables needed we construct our RNN as follows :
   ([x_vals, y_vals],updates) = theano.scan(fn = oneStep, \
                                sequences    = dict(input = u, taps= [-4,-0]), \
-                                info_outputs = [dict(initial = x0, taps = [-3,-1]),y0], \
+                                outputs_info = [dict(initial = x0, taps = [-3,-1]),y0], \
                                non_sequences  = [W,W_in_1,W_in_2,W_feedback, W_out])
        # for second input y, scan adds -1 in output_taps by default
@@ -155,7 +155,7 @@ the following:
 sample = theano.tensor.vector()
- values, updates = theano.scan( OneStep, info_outputs = sample, n_steps = 10 )
+ values, updates = theano.scan( OneStep, outputs_info = sample, n_steps = 10 )
 gibbs10 = theano.function([sample], values[-1], updates = updates)

--- a/theano/compile/profilemode.py
+++ b/theano/compile/profilemode.py
@@ -340,7 +340,7 @@ class ProfileMode(Mode):
            print "<fct name> <input name> <input type> <str input>"
            for fct in fct_call.keys():
                for i in fct.input_storage:
-                    if i.type.dtype=='float64':
+                    if hasattr(i.type, 'dtype') and i.type.dtype=='float64':
                        print fct.name, i.name, i.type, i
 register_mode('PROFILE_MODE',ProfileMode())

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -832,7 +832,7 @@ class GpuSum(Op):
                            NUM_VECTOR_OP_THREADS_PER_BLOCK));
            %(threads_y)s
            %(threads_z)s
-            dim3 n_blocks(CudaNdarray_HOST_DIMS(%(x)s)[0]);
+            dim3 n_blocks(std::min(CudaNdarray_HOST_DIMS(%(x)s)[0],NUM_VECTOR_OP_BLOCKS));
            %(makecall)s
        }
        """ %locals()

--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -49,7 +49,7 @@ MASK12 = numpy.int32(511)       #2^9 - 1
 MASK13 = numpy.int32(16777215)  #2^24 - 1
 MASK2 = numpy.int32(65535)      #2^16 - 1
 MULT2 = numpy.int32(21069)
-NORM = 4.656612873077392578125e-10;
+NORM = 4.656612873077392578125e-10; #1./2^31
 A1p0 = numpy.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]])
 A2p0 = numpy.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]])
@@ -593,6 +593,7 @@ class MRG_RandomStreams(object):
        return rval
    def n_streams(self, size):
+        # TODO: a smart way of choosing the number of streams
        if isinstance(size, (tuple, list)):
            r = 1
            for s in size:
@@ -601,12 +602,7 @@ class MRG_RandomStreams(object):
                return r/6 # chosen as fastest for rbm_benchmark
            else:
                return r
-        try:
-            rval =  int(size)
-            assert rval > 0
-            return rval
-        except:
-            pass
        print >> sys.stderr, "MRG_RandomStreams Can't determine #streams from size (%s), guessing 30*256"%str(size)
        return 30*256
@@ -616,7 +612,7 @@ class MRG_RandomStreams(object):
        node_rstate.default_update = new_rstate
        return sample
-    def uniform(self, size=None, low=0.0, high=1.0, ndim=None, dtype=config.floatX):
+    def uniform(self, size=None, low=0.0, high=1.0, ndim=None, dtype=config.floatX, nstreams=None):
        """
        Sample a tensor of given size whose element from a uniform
        distribution between low and high.
@@ -625,8 +621,10 @@ class MRG_RandomStreams(object):
        ndim may be a plain integer to supplement the missing
        information.
        """
+        if nstreams is None:
+            nstreams = self.n_streams(size)
        if self.use_cuda and dtype=='float32':
-            rstates = self.get_substream_rstates(self.n_streams(size))
+            rstates = self.get_substream_rstates(nstreams)
            rstates = rstates.flatten()
            # HACK - we use fact that int32 and float32 have same size to 
            # sneak ints into the CudaNdarray type.
@@ -643,11 +641,11 @@ class MRG_RandomStreams(object):
            u = self.pretty_return(node_rstate, 
                    *GPU_mrg_uniform.new(node_rstate, ndim, dtype, size))
        else:
-            node_rstate = shared(self.get_substream_rstates(self.n_streams(size)))
+            node_rstate = shared(self.get_substream_rstates(nstreams))
-            u = self.pretty_return(node_rstate, 
+            u = self.pretty_return(node_rstate,
                    *mrg_uniform.new(node_rstate, ndim, dtype, size))
        r = u * (high-low) + low
        if u.type.broadcastable != r.type.broadcastable:
            raise NotImplementedError( 'Increase the size to match the broadcasting pattern of `low` and `high` arguments')
        return  r

--- a/theano/sandbox/samples_MRG31k3p_12_7_5.txt
+++ b/theano/sandbox/samples_MRG31k3p_12_7_5.txt
+0.7353244530968368
+0.6142074400559068
+0.11007806099951267
+0.6487741703167558
+0.36619443260133266
+0.2585685825906694
+0.9489980279468
+0.4309556516818702
+0.12257590936496854
+0.9760319022461772
+0.6940806899219751
+0.18046841165050864
+0.003993193618953228
+0.5351603352464736
+0.02472442388534546
+0.7705746139399707
+0.8138928869739175
+0.9650539481081069
+0.24507411010563374
+0.35767574002966285
+0.4939101580530405
+0.9027785388752818
+0.27498403564095497
+0.03848231676965952
+0.3081609820947051
+0.9062023567967117
+0.009030417073518038
+0.7953705741092563
+0.5061718439683318
+0.5975547162815928
+0.5435514179989696
+0.330895590595901
+0.49919482320547104
+0.9409166998229921
+0.8276205519214272
+0.5180770065635443
+0.2319392478093505
+0.36197659047320485
+0.11120751267299056
+0.5018561617471278
+0.47852187464013696
+0.7188052111305296
+0.3030327311716974
+0.6756376498378813
+0.03624899685382843
+0.34987151669338346
+0.031225718092173338
+0.06772322440519929
+0.06820952938869596
+0.9987128847278655
+0.08330700965598226
+0.9731874465942383
+0.6345655219629407
+0.7169904578477144
+0.5793502484448254
+0.7396790678612888
+0.9926023166626692
+0.7522463691420853
+0.6768838302232325
+0.3253784184344113
+0.05375300580635667
+0.4912636987864971
+0.6485021142289042
+0.3043024237267673
+0.24868384934961796
+0.8166692252270877
+0.5274319797754288
+0.31434731651097536
+0.9961257497780025
+0.3549888739362359
+0.8423425843939185
+0.21591948671266437
+0.8698299624957144
+0.17033040337264538
+0.22816143138334155
+0.11795765580609441
+0.7024209997616708
+0.15607220400124788
+0.5493582566268742
+0.5827712984755635
+0.8592293248511851
+0.785309090744704
+0.6115233600139618
+0.019046304281800985
+0.2573754615150392
+0.03130705002695322
+0.6572857238352299
+0.2033171127550304
+0.5058645992539823
+0.15793190989643335
+0.6273676953278482
+0.7285307059064507
+0.265245848800987
+0.6073522809892893
+0.3896624594926834
+0.27189663611352444
+0.705508322454989
+0.12823439668864012
+0.39648046158254147
+0.6584051586687565
+0.07818163838237524
+0.33628708589822054
+0.20613654889166355
+0.4277639244683087
+0.5401185592636466
+0.07513022050261497
+0.4920963351614773
+0.18214095244184136
+0.3235122123733163
+0.29958881670609117
+0.7304665613919497
+0.05146520072594285
+0.2471711952239275
+0.8797005712985992
+0.5029069227166474
+0.526974250562489
+0.15968210343271494
+0.4696163134649396
+0.17607332626357675
+0.362843859475106
+0.7626461815088987
+0.960180682130158
+0.2536660563200712
+0.710880630183965
+0.28728525526821613
+0.78940424695611
+0.5242114691063762
+0.8314367309212685
+0.5898511232808232
+0.015212591737508774
+0.4944482510909438
+0.06396882887929678
+0.519745257217437
+0.3558214954100549
+0.04566589882597327
+0.8368005948141217
+0.979805170558393
+0.7622401369735599
+0.2578657674603164
+0.5378834479488432
+0.9926298237405717
+0.4013678622432053
+0.510077933780849
+0.018817965406924486
+0.21481098141521215
+0.5357040031813085
+0.8512061606161296
+0.009026535786688328
+0.27302876580506563
+0.21162108704447746
+0.5273029855452478
+0.1086404686793685
+0.14079083362594247
+0.14331109775230289
+0.8190496540628374
+0.3947252375073731
+0.28109811525791883
+0.4066850380040705
+0.9154577874578536
+0.8929708409123123
+0.13500721845775843
+0.6328344400972128
+0.5668322211131454
+0.5448646773584187
+0.5418433886952698
+0.1141617177054286
+0.15885689994320273
+0.3867143443785608
+0.5574855520389974
+0.9173167692497373
+0.22908265376463532
+0.2047420055605471
+0.05979115655645728
+0.44121386017650366
+0.9507057839073241
+0.15352962678298354
+0.23290937673300505
+0.46427791472524405
+8.519855327904224E-4
+0.7947354763746262
+0.6385304923169315
+0.8696001935750246
+0.6022149357013404
+0.02299323584884405
+0.5036068987101316
+0.7541037476621568
+0.9995524706318974
+0.5888469088822603
+0.3318097642622888
+0.32492663664743304
+0.6643895329907537
+0.3656829949468374
+0.4912424306385219
+0.1900841724127531
+0.5945985522121191
+0.5709856003522873
+0.35780346347019076
+0.388774358201772
+0.9446004652418196
+0.14594348100945354
+0.6250799335539341
+0.5504232128150761
+0.16380576323717833
+0.7428167965263128
+0.5522975320927799
+0.655389194842428
+0.47579632699489594
+0.29743909696117043
+0.6319712968543172
+0.8178138644434512
+0.2785301594994962
+0.46813122322782874
+0.2898342702537775
+0.3287009159103036
+0.12909299414604902
+0.5859099281951785
+0.1891166502609849
+0.14497734932228923
+0.5543341124430299
+0.11846801871433854
+0.8499364419840276
+0.6603211951442063
+0.35630465345457196
+0.9680569358170033
+0.6639338186942041
+0.24408268369734287
+0.030771974939852953
+0.17226932244375348
+0.7909302446059883
+0.4327161009423435
+0.6732332338578999
+0.0849734228104353
+0.7278832173906267
+0.5536605608649552
+0.7091806619428098
+0.01754110073670745
+0.8406045655719936
+0.4815619965083897
+0.0535086034797132
+0.9874794147908688
+0.07097038673236966
+0.023544831201434135
+0.42413365049287677
+0.2970325672067702
+0.48028060607612133
+0.1990663455799222
+0.6099434774369001
+0.5050413520075381
+0.7814605687744915
+0.2650358658283949
+0.5148864723742008
+0.7807142282836139
+0.0976667134091258
+0.1516015767119825
+0.6566055505536497
+0.3946392172947526
+0.8052488421089947
+0.2964451564475894
+0.07394864456728101
+0.6961450576782227
+0.01576960226520896
+0.3434433783404529
+0.08799878368154168
+0.785557022318244
+0.7494717631489038
+0.45548726338893175
+0.7672475459985435
+0.5134695749729872
+0.7000438082031906
+0.49818582693114877
+0.4293400440365076
+0.9961911663413048
+0.016769078094512224
+0.013044610153883696
+0.8661804771982133
+0.7819683295674622
+0.33438047766685486
+0.966121535282582
+0.7259743176400661
+0.9887824659235775
+0.9494950002990663
+0.037431647535413504
+0.8268285538069904
+0.7355263698846102
+0.3120658891275525
+0.3588241692632437
+0.471130283549428
+0.7047113911248744
+0.980073744431138
+0.6762627908028662
+0.869295812677592
+0.9070576094090939
+0.7852784115821123
+0.16342713963240385
+0.06330870278179646
+0.6165989111177623
+0.342802997212857
+0.8414176292717457
+0.6921333004720509
+0.2594374935142696
+0.4386491202749312
+0.555369642097503
+0.3660965468734503
+0.6484139142557979
+0.9005299550481141
+0.25335891311988235
+0.23852926725521684
+0.9044205779209733
+0.8694673446007073
+0.46783560374751687
+0.34727911837399006
+0.19556640228256583
+0.8798208390362561
+0.3131108647212386
+0.6312824171036482
+0.5722001581452787
+0.9441223978064954
+0.7707183314487338
+0.17464511329308152
+0.08897313429042697
+0.5044040409848094
+0.5735817537643015
+0.4467783076688647
+0.19051036844030023
+0.4578995378687978
+0.6395204453729093
+0.460110604763031
+0.576092894654721
+0.7038368303328753
+0.5555814192630351
+0.4171535111963749
+0.8905360852368176
+0.12811446748673916
+0.6814800254069269
+0.8502416326664388
+0.12028768053278327
+0.16715052351355553
+0.3563938206061721
+0.049810963682830334
+0.27328392397612333
+0.2407418810762465
+0.6631906591355801
+0.674483266659081
+0.10489491606131196
+0.04698043642565608
+0.0812066881917417
+0.312124056275934
+0.6798701109364629
+0.7286937129683793
+0.9784366562962532
+0.5650205011479557
+0.833059043623507
+0.8976074242964387
+0.9441233519464731
+0.6146679543890059
+0.9019614770077169
+0.5529476394876838
+0.7665416682139039
+0.39598167687654495
+0.26307358546182513
+0.14862705068662763
+0.9521124185994267
+0.17644333699718118
+0.7684473628178239
+0.4274347145110369
+0.6102834036573768
+0.9328651092946529
+0.058630190789699554
+0.04729347629472613
+0.9597438890486956
+0.6761234584264457
+0.21832499839365482
+0.20707347383722663
+0.7274158899672329
+0.9477886455133557
+0.7821800266392529
+0.07305240212008357
+0.40399201214313507
+0.22684293938800693
+0.053185423370450735
+0.330069282092154
+0.6862794999033213
+0.7821815954521298
+0.22617859859019518
+0.8118352359160781
+0.015444065444171429
+0.6732339109294116
+0.9980663135647774
+0.8833195753395557
+0.21191661106422544
+0.32638366147875786
+0.5747208022512496
+0.07515769777819514
+0.02952938713133335
+0.4980746121145785
+0.8762881984002888
+0.17386484891176224
+0.10696181375533342
+0.5474299816414714
+0.016154434997588396
+0.6960771018639207
+0.47133891424164176
+0.9015861176885664
+0.782880718819797
+0.6602211343124509
+0.6578835439868271
+0.6049443730153143
+0.17169494135305285
+0.9915955001488328
+0.10519243823364377
+0.37815978936851025
+0.20879409136250615
+0.45666090911254287
+0.6456936108879745
+0.684759714640677
+0.8762755445204675
+0.8020628895610571
+0.1663151141256094
+0.31246642768383026
+0.18852565623819828
--- a/theano/sandbox/test_rng_mrg.py
+++ b/theano/sandbox/test_rng_mrg.py
 import sys, time
 import numpy
 import theano
+from theano import tensor, config
+from theano.sandbox import rng_mrg
 from theano.sandbox.rng_mrg import MRG_RandomStreams
+from theano.sandbox.cuda import cuda_available, cuda_enabled
+if cuda_available:
+    from theano.sandbox.cuda import float32_shared_constructor
+import unittest
+from theano.tests import unittest_tools as utt
 #TODO: test gpu
+# Done in test_consistency_GPU_{serial,parallel}
 #TODO: test MRG_RandomStreams
+# Partly done in test_consistency_randomstreams
 #TODO: test optimizer mrg_random_make_inplace
-#def test_rng_mrg_cpu():
 #TODO: make tests work when no flags gived. Now need: THEANO_FLAGS=device=gpu0,floatX=float32
+# Partly done, in test_consistency_GPU_{serial,parallel}
 #TODO: bug fix test_normal0, in normal() fct, n_samples currently need to be numpy.prod(size) not self.n_streams(size)
 mode = theano.config.mode
+utt.seed_rng()
+## Results generated by Java code using L'Ecuyer et al.'s code, with:
+# main seed: [12345]*6 (default)
+# 12 streams
+# 7 substreams for each stream
+# 5 samples drawn from each substream
+java_samples = numpy.loadtxt('samples_MRG31k3p_12_7_5.txt')
+def test_deterministic():
+    seed = utt.fetch_seed()
+    sample_size = (10, 20)
+    test_use_cuda = [False]
+    if cuda_enabled:
+        test_use_cuda.append(True)
+    for use_cuda in test_use_cuda:
+        print 'use_cuda =', use_cuda
+        R = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
+        u = R.uniform(size=sample_size)
+        f = theano.function([], u)
+        fsample1 = f()
+        fsample2 = f()
+        assert not numpy.allclose(fsample1, fsample2)
+        R2 = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
+        u2 = R2.uniform(size=sample_size)
+        g = theano.function([], u2)
+        gsample1 = g()
+        gsample2 = g()
+        assert numpy.allclose(fsample1, gsample1)
+        assert numpy.allclose(fsample2, gsample2)
+def test_consistency_randomstreams():
+    '''Verify that the random numbers generated by MRG_RandomStreams
+    are the same as the reference (Java) implementation by L'Ecuyer et al.
+    '''
+    seed = 12345
+    n_samples = 5
+    n_streams = 12
+    n_substreams = 7
+    test_use_cuda = [False]
+    if cuda_enabled:
+        test_use_cuda.append(True)
+    for use_cuda in test_use_cuda:
+        print 'use_cuda =', use_cuda
+        samples = []
+        rng = MRG_RandomStreams(seed = seed, use_cuda=False)
+        for i in range(n_streams):
+            stream_samples = []
+            u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
+            f = theano.function([], u)
+            for j in range(n_samples):
+                s = f()
+                stream_samples.append(s)
+            stream_samples = numpy.array(stream_samples)
+            stream_samples = stream_samples.T.flatten()
+            samples.append(stream_samples)
+        samples = numpy.array(samples).flatten()
+        assert(numpy.allclose(samples, java_samples))
+def test_consistency_cpu_serial():
+    '''Verify that the random numbers generated by mrg_uniform, serially,
+    are the same as the reference (Java) implementation by L'Ecuyer et al.
+    '''
+    seed = 12345
+    n_samples = 5
+    n_streams = 12
+    n_substreams = 7
+    samples = []
+    curr_rstate = numpy.array([seed] * 6, dtype='int32')
+    for i in range(n_streams):
+        stream_rstate = curr_rstate.copy()
+        for j in range(n_substreams):
+            rstate = tensor.shared(numpy.array([stream_rstate.copy()], dtype='int32'))
+            new_rstate, sample = rng_mrg.mrg_uniform.new(rstate, ndim=None, dtype=config.floatX, size=(1,))
+            # Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior
+            sample.rstate = rstate
+            sample.update = (rstate, new_rstate)
+            rstate.default_update = new_rstate
+            f = theano.function([], sample)
+            for k in range(n_samples):
+                s = f()
+                samples.append(s)
+            # next substream
+            stream_rstate = rng_mrg.ff_2p72(stream_rstate)
+        # next stream
+        curr_rstate = rng_mrg.ff_2p134(curr_rstate)
+    samples = numpy.array(samples).flatten()
+    assert(numpy.allclose(samples, java_samples))
+def test_consistency_cpu_parallel():
+    '''Verify that the random numbers generated by mrg_uniform, in parallel,
+    are the same as the reference (Java) implementation by L'Ecuyer et al.
+    '''
+    seed = 12345
+    n_samples = 5
+    n_streams = 12
+    n_substreams = 7 # 7 samples will be drawn in parallel
+    samples = []
+    curr_rstate = numpy.array([seed]*6, dtype='int32')
+    for i in range(n_streams):
+        stream_samples = []
+        rstate = [curr_rstate.copy()]
+        for j in range(1, n_substreams):
+            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
+        rstate = numpy.asarray(rstate)
+        rstate = tensor.shared(rstate)
+        new_rstate, sample = rng_mrg.mrg_uniform.new(rstate, ndim=None,
+                dtype=config.floatX, size=(n_substreams,))
+        # Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior
+        sample.rstate = rstate
+        sample.update = (rstate, new_rstate)
+        rstate.default_update = new_rstate
+        f = theano.function([], sample)
+        for k in range(n_samples):
+            s = f()
+            stream_samples.append(s)
+        samples.append(numpy.array(stream_samples).T.flatten())
+        # next stream
+        curr_rstate = rng_mrg.ff_2p134(curr_rstate)
+    samples = numpy.array(samples).flatten()
+    assert(numpy.allclose(samples, java_samples))
+def test_consistency_GPU_serial():
+    '''Verify that the random numbers generated by GPU_mrg_uniform, serially,
+    are the same as the reference (Java) implementation by L'Ecuyer et al.
+    '''
+    if not cuda_available:
+        raise SkipTest('Optional package cuda not available')
+    if config.mode == 'FAST_COMPILE':
+        mode = 'FAST_RUN'
+    else:
+        mode = config.mode
+    seed = 12345
+    n_samples = 5
+    n_streams = 12
+    n_substreams = 7
+    samples = []
+    curr_rstate = numpy.array([seed] * 6, dtype='int32')
+    for i in range(n_streams):
+        stream_rstate = curr_rstate.copy()
+        for j in range(n_substreams):
+            substream_rstate = numpy.array(stream_rstate.copy(), dtype='int32')
+            # HACK - we transfer these int32 to the GPU memory as float32
+            # (reinterpret_cast)
+            tmp_float_buf = numpy.frombuffer(substream_rstate.data, dtype='float32')
+            rstate = float32_shared_constructor(tmp_float_buf) # Transfer to device
+            new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate, ndim=None,
+                    dtype='float32', size=(1,))
+            rstate.default_update = new_rstate
+            # Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior
+            sample.rstate = rstate
+            sample.update = (rstate, new_rstate)
+            # We need the sample back in the main memory
+            cpu_sample = tensor.as_tensor_variable(sample)
+            f = theano.function([], cpu_sample, mode=mode)
+            for k in range(n_samples):
+                s = f()
+                samples.append(s)
+            # next substream
+            stream_rstate = rng_mrg.ff_2p72(stream_rstate)
+        # next stream
+        curr_rstate = rng_mrg.ff_2p134(curr_rstate)
+    samples = numpy.array(samples).flatten()
+    assert(numpy.allclose(samples, java_samples))
+def test_consistency_GPU_parallel():
+    '''Verify that the random numbers generated by GPU_mrg_uniform, in parallel,
+    are the same as the reference (Java) implementation by L'Ecuyer et al.
+    '''
+    if not cuda_available:
+        raise SkipTest('Optional package cuda not available')
+    if config.mode == 'FAST_COMPILE':
+        mode = 'FAST_RUN'
+    else:
+        mode = config.mode
+    seed = 12345
+    n_samples = 5
+    n_streams = 12
+    n_substreams = 7 # 7 samples will be drawn in parallel
+    samples = []
+    curr_rstate = numpy.array([seed]*6, dtype='int32')
+    for i in range(n_streams):
+        stream_samples = []
+        rstate = [curr_rstate.copy()]
+        for j in range(1, n_substreams):
+            rstate.append(rng_mrg.ff_2p72(rstate[-1]))
+        rstate = numpy.asarray(rstate).flatten()
+        # HACK - transfer these int32 to the GPU memory as float32
+        # (reinterpret_cast)
+        tmp_float_buf = numpy.frombuffer(rstate.data, dtype='float32')
+        rstate = float32_shared_constructor(tmp_float_buf) # Transfer to device
+        new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate, ndim=None,
+                dtype='float32', size=(n_substreams,))
+        rstate.default_update = new_rstate
+        # Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior
+        sample.rstate = rstate
+        sample.update = (rstate, new_rstate)
+        # We need the sample back in the main memory
+        cpu_sample = tensor.as_tensor_variable(sample)
+        f = theano.function([], cpu_sample)
+        for k in range(n_samples):
+            s = f()
+            stream_samples.append(s)
+        samples.append(numpy.array(stream_samples).T.flatten())
+        # next stream
+        curr_rstate = rng_mrg.ff_2p134(curr_rstate)
+    samples = numpy.array(samples).flatten()
+    assert(numpy.allclose(samples, java_samples))
 def test_rng0():

--- a/theano/scan.py
+++ b/theano/scan.py
@@ -639,7 +639,10 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
    notshared_other_args_copies = []
    for non_seq in non_seqs:
        if not isinstance(non_seq, SharedVariable):
-            non_seq_copy = non_seq.type()
+            if n_fixed_steps not in [-1,1]:
+                non_seq_copy = non_seq.type()
+            else:
+                non_seq_copy = non_seq
            notshared_other_args += [non_seq]
            notshared_other_args_copies += [non_seq_copy]
            new_non_seqs += [non_seq_copy]