提交 58783627 authored 作者: James Bergstra's avatar James Bergstra

merge

...@@ -32,7 +32,7 @@ The equivalent Theano code would be ...@@ -32,7 +32,7 @@ The equivalent Theano code would be
# Symbolic description of the result # Symbolic description of the result
result,updates = theano.scan(fn = lambda x_tm1,A: x_tm1*A,\ result,updates = theano.scan(fn = lambda x_tm1,A: x_tm1*A,\
info_outputs = T.ones_like(A),\ outputs_info = T.ones_like(A),\
non_sequences = A, \ non_sequences = A, \
n_steps = k) n_steps = k)
...@@ -112,7 +112,7 @@ the Theano variables needed we construct our RNN as follows : ...@@ -112,7 +112,7 @@ the Theano variables needed we construct our RNN as follows :
([x_vals, y_vals],updates) = theano.scan(fn = oneStep, \ ([x_vals, y_vals],updates) = theano.scan(fn = oneStep, \
sequences = dict(input = u, taps= [-4,-0]), \ sequences = dict(input = u, taps= [-4,-0]), \
info_outputs = [dict(initial = x0, taps = [-3,-1]),y0], \ outputs_info = [dict(initial = x0, taps = [-3,-1]),y0], \
non_sequences = [W,W_in_1,W_in_2,W_feedback, W_out]) non_sequences = [W,W_in_1,W_in_2,W_feedback, W_out])
# for second input y, scan adds -1 in output_taps by default # for second input y, scan adds -1 in output_taps by default
...@@ -155,7 +155,7 @@ the following: ...@@ -155,7 +155,7 @@ the following:
sample = theano.tensor.vector() sample = theano.tensor.vector()
values, updates = theano.scan( OneStep, info_outputs = sample, n_steps = 10 ) values, updates = theano.scan( OneStep, outputs_info = sample, n_steps = 10 )
gibbs10 = theano.function([sample], values[-1], updates = updates) gibbs10 = theano.function([sample], values[-1], updates = updates)
......
...@@ -340,7 +340,7 @@ class ProfileMode(Mode): ...@@ -340,7 +340,7 @@ class ProfileMode(Mode):
print "<fct name> <input name> <input type> <str input>" print "<fct name> <input name> <input type> <str input>"
for fct in fct_call.keys(): for fct in fct_call.keys():
for i in fct.input_storage: for i in fct.input_storage:
if i.type.dtype=='float64': if hasattr(i.type, 'dtype') and i.type.dtype=='float64':
print fct.name, i.name, i.type, i print fct.name, i.name, i.type, i
register_mode('PROFILE_MODE',ProfileMode()) register_mode('PROFILE_MODE',ProfileMode())
......
...@@ -832,7 +832,7 @@ class GpuSum(Op): ...@@ -832,7 +832,7 @@ class GpuSum(Op):
NUM_VECTOR_OP_THREADS_PER_BLOCK)); NUM_VECTOR_OP_THREADS_PER_BLOCK));
%(threads_y)s %(threads_y)s
%(threads_z)s %(threads_z)s
dim3 n_blocks(CudaNdarray_HOST_DIMS(%(x)s)[0]); dim3 n_blocks(std::min(CudaNdarray_HOST_DIMS(%(x)s)[0],NUM_VECTOR_OP_BLOCKS));
%(makecall)s %(makecall)s
} }
""" %locals() """ %locals()
......
...@@ -49,7 +49,7 @@ MASK12 = numpy.int32(511) #2^9 - 1 ...@@ -49,7 +49,7 @@ MASK12 = numpy.int32(511) #2^9 - 1
MASK13 = numpy.int32(16777215) #2^24 - 1 MASK13 = numpy.int32(16777215) #2^24 - 1
MASK2 = numpy.int32(65535) #2^16 - 1 MASK2 = numpy.int32(65535) #2^16 - 1
MULT2 = numpy.int32(21069) MULT2 = numpy.int32(21069)
NORM = 4.656612873077392578125e-10; NORM = 4.656612873077392578125e-10; #1./2^31
A1p0 = numpy.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]]) A1p0 = numpy.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]])
A2p0 = numpy.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]]) A2p0 = numpy.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]])
...@@ -593,6 +593,7 @@ class MRG_RandomStreams(object): ...@@ -593,6 +593,7 @@ class MRG_RandomStreams(object):
return rval return rval
def n_streams(self, size): def n_streams(self, size):
# TODO: a smart way of choosing the number of streams
if isinstance(size, (tuple, list)): if isinstance(size, (tuple, list)):
r = 1 r = 1
for s in size: for s in size:
...@@ -601,12 +602,7 @@ class MRG_RandomStreams(object): ...@@ -601,12 +602,7 @@ class MRG_RandomStreams(object):
return r/6 # chosen as fastest for rbm_benchmark return r/6 # chosen as fastest for rbm_benchmark
else: else:
return r return r
try:
rval = int(size)
assert rval > 0
return rval
except:
pass
print >> sys.stderr, "MRG_RandomStreams Can't determine #streams from size (%s), guessing 30*256"%str(size) print >> sys.stderr, "MRG_RandomStreams Can't determine #streams from size (%s), guessing 30*256"%str(size)
return 30*256 return 30*256
...@@ -616,7 +612,7 @@ class MRG_RandomStreams(object): ...@@ -616,7 +612,7 @@ class MRG_RandomStreams(object):
node_rstate.default_update = new_rstate node_rstate.default_update = new_rstate
return sample return sample
def uniform(self, size=None, low=0.0, high=1.0, ndim=None, dtype=config.floatX): def uniform(self, size=None, low=0.0, high=1.0, ndim=None, dtype=config.floatX, nstreams=None):
""" """
Sample a tensor of given size whose element from a uniform Sample a tensor of given size whose element from a uniform
distribution between low and high. distribution between low and high.
...@@ -625,8 +621,10 @@ class MRG_RandomStreams(object): ...@@ -625,8 +621,10 @@ class MRG_RandomStreams(object):
ndim may be a plain integer to supplement the missing ndim may be a plain integer to supplement the missing
information. information.
""" """
if nstreams is None:
nstreams = self.n_streams(size)
if self.use_cuda and dtype=='float32': if self.use_cuda and dtype=='float32':
rstates = self.get_substream_rstates(self.n_streams(size)) rstates = self.get_substream_rstates(nstreams)
rstates = rstates.flatten() rstates = rstates.flatten()
# HACK - we use fact that int32 and float32 have same size to # HACK - we use fact that int32 and float32 have same size to
# sneak ints into the CudaNdarray type. # sneak ints into the CudaNdarray type.
...@@ -643,11 +641,11 @@ class MRG_RandomStreams(object): ...@@ -643,11 +641,11 @@ class MRG_RandomStreams(object):
u = self.pretty_return(node_rstate, u = self.pretty_return(node_rstate,
*GPU_mrg_uniform.new(node_rstate, ndim, dtype, size)) *GPU_mrg_uniform.new(node_rstate, ndim, dtype, size))
else: else:
node_rstate = shared(self.get_substream_rstates(self.n_streams(size))) node_rstate = shared(self.get_substream_rstates(nstreams))
u = self.pretty_return(node_rstate, u = self.pretty_return(node_rstate,
*mrg_uniform.new(node_rstate, ndim, dtype, size)) *mrg_uniform.new(node_rstate, ndim, dtype, size))
r = u * (high-low) + low r = u * (high-low) + low
if u.type.broadcastable != r.type.broadcastable: if u.type.broadcastable != r.type.broadcastable:
raise NotImplementedError( 'Increase the size to match the broadcasting pattern of `low` and `high` arguments') raise NotImplementedError( 'Increase the size to match the broadcasting pattern of `low` and `high` arguments')
return r return r
......
0.7353244530968368
0.6142074400559068
0.11007806099951267
0.6487741703167558
0.36619443260133266
0.2585685825906694
0.9489980279468
0.4309556516818702
0.12257590936496854
0.9760319022461772
0.6940806899219751
0.18046841165050864
0.003993193618953228
0.5351603352464736
0.02472442388534546
0.7705746139399707
0.8138928869739175
0.9650539481081069
0.24507411010563374
0.35767574002966285
0.4939101580530405
0.9027785388752818
0.27498403564095497
0.03848231676965952
0.3081609820947051
0.9062023567967117
0.009030417073518038
0.7953705741092563
0.5061718439683318
0.5975547162815928
0.5435514179989696
0.330895590595901
0.49919482320547104
0.9409166998229921
0.8276205519214272
0.5180770065635443
0.2319392478093505
0.36197659047320485
0.11120751267299056
0.5018561617471278
0.47852187464013696
0.7188052111305296
0.3030327311716974
0.6756376498378813
0.03624899685382843
0.34987151669338346
0.031225718092173338
0.06772322440519929
0.06820952938869596
0.9987128847278655
0.08330700965598226
0.9731874465942383
0.6345655219629407
0.7169904578477144
0.5793502484448254
0.7396790678612888
0.9926023166626692
0.7522463691420853
0.6768838302232325
0.3253784184344113
0.05375300580635667
0.4912636987864971
0.6485021142289042
0.3043024237267673
0.24868384934961796
0.8166692252270877
0.5274319797754288
0.31434731651097536
0.9961257497780025
0.3549888739362359
0.8423425843939185
0.21591948671266437
0.8698299624957144
0.17033040337264538
0.22816143138334155
0.11795765580609441
0.7024209997616708
0.15607220400124788
0.5493582566268742
0.5827712984755635
0.8592293248511851
0.785309090744704
0.6115233600139618
0.019046304281800985
0.2573754615150392
0.03130705002695322
0.6572857238352299
0.2033171127550304
0.5058645992539823
0.15793190989643335
0.6273676953278482
0.7285307059064507
0.265245848800987
0.6073522809892893
0.3896624594926834
0.27189663611352444
0.705508322454989
0.12823439668864012
0.39648046158254147
0.6584051586687565
0.07818163838237524
0.33628708589822054
0.20613654889166355
0.4277639244683087
0.5401185592636466
0.07513022050261497
0.4920963351614773
0.18214095244184136
0.3235122123733163
0.29958881670609117
0.7304665613919497
0.05146520072594285
0.2471711952239275
0.8797005712985992
0.5029069227166474
0.526974250562489
0.15968210343271494
0.4696163134649396
0.17607332626357675
0.362843859475106
0.7626461815088987
0.960180682130158
0.2536660563200712
0.710880630183965
0.28728525526821613
0.78940424695611
0.5242114691063762
0.8314367309212685
0.5898511232808232
0.015212591737508774
0.4944482510909438
0.06396882887929678
0.519745257217437
0.3558214954100549
0.04566589882597327
0.8368005948141217
0.979805170558393
0.7622401369735599
0.2578657674603164
0.5378834479488432
0.9926298237405717
0.4013678622432053
0.510077933780849
0.018817965406924486
0.21481098141521215
0.5357040031813085
0.8512061606161296
0.009026535786688328
0.27302876580506563
0.21162108704447746
0.5273029855452478
0.1086404686793685
0.14079083362594247
0.14331109775230289
0.8190496540628374
0.3947252375073731
0.28109811525791883
0.4066850380040705
0.9154577874578536
0.8929708409123123
0.13500721845775843
0.6328344400972128
0.5668322211131454
0.5448646773584187
0.5418433886952698
0.1141617177054286
0.15885689994320273
0.3867143443785608
0.5574855520389974
0.9173167692497373
0.22908265376463532
0.2047420055605471
0.05979115655645728
0.44121386017650366
0.9507057839073241
0.15352962678298354
0.23290937673300505
0.46427791472524405
8.519855327904224E-4
0.7947354763746262
0.6385304923169315
0.8696001935750246
0.6022149357013404
0.02299323584884405
0.5036068987101316
0.7541037476621568
0.9995524706318974
0.5888469088822603
0.3318097642622888
0.32492663664743304
0.6643895329907537
0.3656829949468374
0.4912424306385219
0.1900841724127531
0.5945985522121191
0.5709856003522873
0.35780346347019076
0.388774358201772
0.9446004652418196
0.14594348100945354
0.6250799335539341
0.5504232128150761
0.16380576323717833
0.7428167965263128
0.5522975320927799
0.655389194842428
0.47579632699489594
0.29743909696117043
0.6319712968543172
0.8178138644434512
0.2785301594994962
0.46813122322782874
0.2898342702537775
0.3287009159103036
0.12909299414604902
0.5859099281951785
0.1891166502609849
0.14497734932228923
0.5543341124430299
0.11846801871433854
0.8499364419840276
0.6603211951442063
0.35630465345457196
0.9680569358170033
0.6639338186942041
0.24408268369734287
0.030771974939852953
0.17226932244375348
0.7909302446059883
0.4327161009423435
0.6732332338578999
0.0849734228104353
0.7278832173906267
0.5536605608649552
0.7091806619428098
0.01754110073670745
0.8406045655719936
0.4815619965083897
0.0535086034797132
0.9874794147908688
0.07097038673236966
0.023544831201434135
0.42413365049287677
0.2970325672067702
0.48028060607612133
0.1990663455799222
0.6099434774369001
0.5050413520075381
0.7814605687744915
0.2650358658283949
0.5148864723742008
0.7807142282836139
0.0976667134091258
0.1516015767119825
0.6566055505536497
0.3946392172947526
0.8052488421089947
0.2964451564475894
0.07394864456728101
0.6961450576782227
0.01576960226520896
0.3434433783404529
0.08799878368154168
0.785557022318244
0.7494717631489038
0.45548726338893175
0.7672475459985435
0.5134695749729872
0.7000438082031906
0.49818582693114877
0.4293400440365076
0.9961911663413048
0.016769078094512224
0.013044610153883696
0.8661804771982133
0.7819683295674622
0.33438047766685486
0.966121535282582
0.7259743176400661
0.9887824659235775
0.9494950002990663
0.037431647535413504
0.8268285538069904
0.7355263698846102
0.3120658891275525
0.3588241692632437
0.471130283549428
0.7047113911248744
0.980073744431138
0.6762627908028662
0.869295812677592
0.9070576094090939
0.7852784115821123
0.16342713963240385
0.06330870278179646
0.6165989111177623
0.342802997212857
0.8414176292717457
0.6921333004720509
0.2594374935142696
0.4386491202749312
0.555369642097503
0.3660965468734503
0.6484139142557979
0.9005299550481141
0.25335891311988235
0.23852926725521684
0.9044205779209733
0.8694673446007073
0.46783560374751687
0.34727911837399006
0.19556640228256583
0.8798208390362561
0.3131108647212386
0.6312824171036482
0.5722001581452787
0.9441223978064954
0.7707183314487338
0.17464511329308152
0.08897313429042697
0.5044040409848094
0.5735817537643015
0.4467783076688647
0.19051036844030023
0.4578995378687978
0.6395204453729093
0.460110604763031
0.576092894654721
0.7038368303328753
0.5555814192630351
0.4171535111963749
0.8905360852368176
0.12811446748673916
0.6814800254069269
0.8502416326664388
0.12028768053278327
0.16715052351355553
0.3563938206061721
0.049810963682830334
0.27328392397612333
0.2407418810762465
0.6631906591355801
0.674483266659081
0.10489491606131196
0.04698043642565608
0.0812066881917417
0.312124056275934
0.6798701109364629
0.7286937129683793
0.9784366562962532
0.5650205011479557
0.833059043623507
0.8976074242964387
0.9441233519464731
0.6146679543890059
0.9019614770077169
0.5529476394876838
0.7665416682139039
0.39598167687654495
0.26307358546182513
0.14862705068662763
0.9521124185994267
0.17644333699718118
0.7684473628178239
0.4274347145110369
0.6102834036573768
0.9328651092946529
0.058630190789699554
0.04729347629472613
0.9597438890486956
0.6761234584264457
0.21832499839365482
0.20707347383722663
0.7274158899672329
0.9477886455133557
0.7821800266392529
0.07305240212008357
0.40399201214313507
0.22684293938800693
0.053185423370450735
0.330069282092154
0.6862794999033213
0.7821815954521298
0.22617859859019518
0.8118352359160781
0.015444065444171429
0.6732339109294116
0.9980663135647774
0.8833195753395557
0.21191661106422544
0.32638366147875786
0.5747208022512496
0.07515769777819514
0.02952938713133335
0.4980746121145785
0.8762881984002888
0.17386484891176224
0.10696181375533342
0.5474299816414714
0.016154434997588396
0.6960771018639207
0.47133891424164176
0.9015861176885664
0.782880718819797
0.6602211343124509
0.6578835439868271
0.6049443730153143
0.17169494135305285
0.9915955001488328
0.10519243823364377
0.37815978936851025
0.20879409136250615
0.45666090911254287
0.6456936108879745
0.684759714640677
0.8762755445204675
0.8020628895610571
0.1663151141256094
0.31246642768383026
0.18852565623819828
import sys, time import sys, time
import numpy import numpy
import theano import theano
from theano import tensor, config
from theano.sandbox import rng_mrg
from theano.sandbox.rng_mrg import MRG_RandomStreams from theano.sandbox.rng_mrg import MRG_RandomStreams
from theano.sandbox.cuda import cuda_available, cuda_enabled
if cuda_available:
from theano.sandbox.cuda import float32_shared_constructor
import unittest
from theano.tests import unittest_tools as utt
#TODO: test gpu #TODO: test gpu
# Done in test_consistency_GPU_{serial,parallel}
#TODO: test MRG_RandomStreams #TODO: test MRG_RandomStreams
# Partly done in test_consistency_randomstreams
#TODO: test optimizer mrg_random_make_inplace #TODO: test optimizer mrg_random_make_inplace
#def test_rng_mrg_cpu():
#TODO: make tests work when no flags gived. Now need: THEANO_FLAGS=device=gpu0,floatX=float32 #TODO: make tests work when no flags gived. Now need: THEANO_FLAGS=device=gpu0,floatX=float32
# Partly done, in test_consistency_GPU_{serial,parallel}
#TODO: bug fix test_normal0, in normal() fct, n_samples currently need to be numpy.prod(size) not self.n_streams(size) #TODO: bug fix test_normal0, in normal() fct, n_samples currently need to be numpy.prod(size) not self.n_streams(size)
mode = theano.config.mode mode = theano.config.mode
utt.seed_rng()
## Results generated by Java code using L'Ecuyer et al.'s code, with:
# main seed: [12345]*6 (default)
# 12 streams
# 7 substreams for each stream
# 5 samples drawn from each substream
java_samples = numpy.loadtxt('samples_MRG31k3p_12_7_5.txt')
def test_deterministic():
seed = utt.fetch_seed()
sample_size = (10, 20)
test_use_cuda = [False]
if cuda_enabled:
test_use_cuda.append(True)
for use_cuda in test_use_cuda:
print 'use_cuda =', use_cuda
R = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
u = R.uniform(size=sample_size)
f = theano.function([], u)
fsample1 = f()
fsample2 = f()
assert not numpy.allclose(fsample1, fsample2)
R2 = MRG_RandomStreams(seed=seed, use_cuda=use_cuda)
u2 = R2.uniform(size=sample_size)
g = theano.function([], u2)
gsample1 = g()
gsample2 = g()
assert numpy.allclose(fsample1, gsample1)
assert numpy.allclose(fsample2, gsample2)
def test_consistency_randomstreams():
'''Verify that the random numbers generated by MRG_RandomStreams
are the same as the reference (Java) implementation by L'Ecuyer et al.
'''
seed = 12345
n_samples = 5
n_streams = 12
n_substreams = 7
test_use_cuda = [False]
if cuda_enabled:
test_use_cuda.append(True)
for use_cuda in test_use_cuda:
print 'use_cuda =', use_cuda
samples = []
rng = MRG_RandomStreams(seed = seed, use_cuda=False)
for i in range(n_streams):
stream_samples = []
u = rng.uniform(size=(n_substreams,), nstreams=n_substreams)
f = theano.function([], u)
for j in range(n_samples):
s = f()
stream_samples.append(s)
stream_samples = numpy.array(stream_samples)
stream_samples = stream_samples.T.flatten()
samples.append(stream_samples)
samples = numpy.array(samples).flatten()
assert(numpy.allclose(samples, java_samples))
def test_consistency_cpu_serial():
'''Verify that the random numbers generated by mrg_uniform, serially,
are the same as the reference (Java) implementation by L'Ecuyer et al.
'''
seed = 12345
n_samples = 5
n_streams = 12
n_substreams = 7
samples = []
curr_rstate = numpy.array([seed] * 6, dtype='int32')
for i in range(n_streams):
stream_rstate = curr_rstate.copy()
for j in range(n_substreams):
rstate = tensor.shared(numpy.array([stream_rstate.copy()], dtype='int32'))
new_rstate, sample = rng_mrg.mrg_uniform.new(rstate, ndim=None, dtype=config.floatX, size=(1,))
# Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior
sample.rstate = rstate
sample.update = (rstate, new_rstate)
rstate.default_update = new_rstate
f = theano.function([], sample)
for k in range(n_samples):
s = f()
samples.append(s)
# next substream
stream_rstate = rng_mrg.ff_2p72(stream_rstate)
# next stream
curr_rstate = rng_mrg.ff_2p134(curr_rstate)
samples = numpy.array(samples).flatten()
assert(numpy.allclose(samples, java_samples))
def test_consistency_cpu_parallel():
'''Verify that the random numbers generated by mrg_uniform, in parallel,
are the same as the reference (Java) implementation by L'Ecuyer et al.
'''
seed = 12345
n_samples = 5
n_streams = 12
n_substreams = 7 # 7 samples will be drawn in parallel
samples = []
curr_rstate = numpy.array([seed]*6, dtype='int32')
for i in range(n_streams):
stream_samples = []
rstate = [curr_rstate.copy()]
for j in range(1, n_substreams):
rstate.append(rng_mrg.ff_2p72(rstate[-1]))
rstate = numpy.asarray(rstate)
rstate = tensor.shared(rstate)
new_rstate, sample = rng_mrg.mrg_uniform.new(rstate, ndim=None,
dtype=config.floatX, size=(n_substreams,))
# Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior
sample.rstate = rstate
sample.update = (rstate, new_rstate)
rstate.default_update = new_rstate
f = theano.function([], sample)
for k in range(n_samples):
s = f()
stream_samples.append(s)
samples.append(numpy.array(stream_samples).T.flatten())
# next stream
curr_rstate = rng_mrg.ff_2p134(curr_rstate)
samples = numpy.array(samples).flatten()
assert(numpy.allclose(samples, java_samples))
def test_consistency_GPU_serial():
'''Verify that the random numbers generated by GPU_mrg_uniform, serially,
are the same as the reference (Java) implementation by L'Ecuyer et al.
'''
if not cuda_available:
raise SkipTest('Optional package cuda not available')
if config.mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
else:
mode = config.mode
seed = 12345
n_samples = 5
n_streams = 12
n_substreams = 7
samples = []
curr_rstate = numpy.array([seed] * 6, dtype='int32')
for i in range(n_streams):
stream_rstate = curr_rstate.copy()
for j in range(n_substreams):
substream_rstate = numpy.array(stream_rstate.copy(), dtype='int32')
# HACK - we transfer these int32 to the GPU memory as float32
# (reinterpret_cast)
tmp_float_buf = numpy.frombuffer(substream_rstate.data, dtype='float32')
rstate = float32_shared_constructor(tmp_float_buf) # Transfer to device
new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate, ndim=None,
dtype='float32', size=(1,))
rstate.default_update = new_rstate
# Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior
sample.rstate = rstate
sample.update = (rstate, new_rstate)
# We need the sample back in the main memory
cpu_sample = tensor.as_tensor_variable(sample)
f = theano.function([], cpu_sample, mode=mode)
for k in range(n_samples):
s = f()
samples.append(s)
# next substream
stream_rstate = rng_mrg.ff_2p72(stream_rstate)
# next stream
curr_rstate = rng_mrg.ff_2p134(curr_rstate)
samples = numpy.array(samples).flatten()
assert(numpy.allclose(samples, java_samples))
def test_consistency_GPU_parallel():
'''Verify that the random numbers generated by GPU_mrg_uniform, in parallel,
are the same as the reference (Java) implementation by L'Ecuyer et al.
'''
if not cuda_available:
raise SkipTest('Optional package cuda not available')
if config.mode == 'FAST_COMPILE':
mode = 'FAST_RUN'
else:
mode = config.mode
seed = 12345
n_samples = 5
n_streams = 12
n_substreams = 7 # 7 samples will be drawn in parallel
samples = []
curr_rstate = numpy.array([seed]*6, dtype='int32')
for i in range(n_streams):
stream_samples = []
rstate = [curr_rstate.copy()]
for j in range(1, n_substreams):
rstate.append(rng_mrg.ff_2p72(rstate[-1]))
rstate = numpy.asarray(rstate).flatten()
# HACK - transfer these int32 to the GPU memory as float32
# (reinterpret_cast)
tmp_float_buf = numpy.frombuffer(rstate.data, dtype='float32')
rstate = float32_shared_constructor(tmp_float_buf) # Transfer to device
new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate, ndim=None,
dtype='float32', size=(n_substreams,))
rstate.default_update = new_rstate
# Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior
sample.rstate = rstate
sample.update = (rstate, new_rstate)
# We need the sample back in the main memory
cpu_sample = tensor.as_tensor_variable(sample)
f = theano.function([], cpu_sample)
for k in range(n_samples):
s = f()
stream_samples.append(s)
samples.append(numpy.array(stream_samples).T.flatten())
# next stream
curr_rstate = rng_mrg.ff_2p134(curr_rstate)
samples = numpy.array(samples).flatten()
assert(numpy.allclose(samples, java_samples))
def test_rng0(): def test_rng0():
......
...@@ -639,7 +639,10 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[], ...@@ -639,7 +639,10 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
notshared_other_args_copies = [] notshared_other_args_copies = []
for non_seq in non_seqs: for non_seq in non_seqs:
if not isinstance(non_seq, SharedVariable): if not isinstance(non_seq, SharedVariable):
non_seq_copy = non_seq.type() if n_fixed_steps not in [-1,1]:
non_seq_copy = non_seq.type()
else:
non_seq_copy = non_seq
notshared_other_args += [non_seq] notshared_other_args += [non_seq]
notshared_other_args_copies += [non_seq_copy] notshared_other_args_copies += [non_seq_copy]
new_non_seqs += [non_seq_copy] new_non_seqs += [non_seq_copy]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论