提交 b58fbb85 authored 作者: Frederic Bastien's avatar Frederic Bastien

merge

......@@ -32,7 +32,7 @@ The equivalent Theano code would be
# Symbolic description of the result
result,updates = theano.scan(fn = lambda x_tm1,A: x_tm1*A,\
info_outputs = T.ones_like(A),\
outputs_info = T.ones_like(A),\
non_sequences = A, \
n_steps = k)
......@@ -112,7 +112,7 @@ the Theano variables needed we construct our RNN as follows :
([x_vals, y_vals],updates) = theano.scan(fn = oneStep, \
sequences = dict(input = u, taps= [-4,-0]), \
info_outputs = [dict(initial = x0, taps = [-3,-1]),y0], \
outputs_info = [dict(initial = x0, taps = [-3,-1]),y0], \
non_sequences = [W,W_in_1,W_in_2,W_feedback, W_out])
# for second input y, scan adds -1 in output_taps by default
......@@ -155,7 +155,7 @@ the following:
sample = theano.tensor.vector()
values, updates = theano.scan( OneStep, info_outputs = sample, n_steps = 10 )
values, updates = theano.scan( OneStep, outputs_info = sample, n_steps = 10 )
gibbs10 = theano.function([sample], values[-1], updates = updates)
......
......@@ -340,7 +340,7 @@ class ProfileMode(Mode):
print "<fct name> <input name> <input type> <str input>"
for fct in fct_call.keys():
for i in fct.input_storage:
if i.type.dtype=='float64':
if hasattr(i.type, 'dtype') and i.type.dtype=='float64':
print fct.name, i.name, i.type, i
register_mode('PROFILE_MODE',ProfileMode())
......
......@@ -344,6 +344,8 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
!work_complete) //conv_rows_stack2
{
// version 9:we preload the full kernel
// version 10: load only a few row at a time.
int nb_row=1;
int max_threads=512;
int version_back = version;
......
......@@ -29,11 +29,17 @@ def debug(*args):
_logger.debug("DEBUG: "+' '.join(str(a) for a in args))
nvcc_path = 'nvcc'
nvcc_version = None
def is_nvcc_available():
"""Return True iff the nvcc compiler is found."""
try:
subprocess.call(['nvcc', '--version'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
p = subprocess.Popen(['nvcc', '--version'], stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
p.wait()
s = p.stdout.readlines()[-1].split(',')[1].strip().split()
assert s[0]=='release'
global nvcc_version
nvcc_version = s[1]
return True
except:
#try to find nvcc into cuda.root
......@@ -43,7 +49,7 @@ def is_nvcc_available():
nvcc_path = p
return True
else: return False
is_nvcc_available()#to set nvcc_path correctly.
is_nvcc_available()#to set nvcc_path correctly and get the version
def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[],
preargs=[]):
......
......@@ -78,10 +78,12 @@ def _params_allgood(ishape, kshape, mode, subsample=(1,1), img_stride=(1,1), ker
kern = cuda_ndarray.CudaNdarray(npy_kern)
#we take the stride after the transfert as we make c_contiguous data on the GPU.
img=img[:,:,::img_stride[0],::img_stride[1]]
kern=kern[:,:,::kern_stride[0],::kern_stride[1]]
npy_img = npy_img[:,:,::img_stride[0],::img_stride[1]]
npy_kern = npy_kern[:,:,::kern_stride[0],::kern_stride[1]]
if img_stride!=(1,1):
img=img[:,:,::img_stride[0],::img_stride[1]]
npy_img = npy_img[:,:,::img_stride[0],::img_stride[1]]
if kern_stride!=(1,1):
kern=kern[:,:,::kern_stride[0],::kern_stride[1]]
npy_kern = npy_kern[:,:,::kern_stride[0],::kern_stride[1]]
t2 = None
rval = True
......@@ -265,12 +267,12 @@ def get_valid_shapes():
, ((60,20,12,12),(30,20,5,5), (1,1), (1,1), (1,1))#test_lenet_28 2 layers
, ((60,30,8,8),(20,30,5,5), (1,1), (1,1), (1,1))#test_lenet_28 bprop 1 full
, ((20,60,12,12),(30,60,8,8), (1,1), (1,1), (1,1))#test_lenet_28 bprop 2 valid
, ((1,60,28,28),(20,60,24,24), (1,1), (1,1), (1,1))#test_lenet_28 bprop 2 valid
# , ((1,60,28,28),(20,60,24,24), (1,1), (1,1), (1,1))#test_lenet_28 bprop 2 valid
, ((10,1,64,64),(20,1,7,7), (1,1), (1,1), (1,1))#test_lenet_64 1 layers
, ((10,20,29,29),(30,20,7,7), (1,1), (1,1), (1,1))#test_lenet_64 2 layers
, ((10,30,23,23),(20,30,7,7), (1,1), (1,1), (1,1))#test_lenet_64 full
, ((20,10,29,29),(30,10,23,23), (1,1), (1,1), (1,1))#test_lenet_64 bprop 1
, ((1,10,64,64),(20,10,58,58), (1,1), (1,1), (1,1))#test_lenet_64 bprop 2
# , ((20,10,29,29),(30,10,23,23), (1,1), (1,1), (1,1))#test_lenet_64 bprop 1
# , ((1,10,64,64),(20,10,58,58), (1,1), (1,1), (1,1))#test_lenet_64 bprop 2
]
return shapes
......
......@@ -49,7 +49,7 @@ MASK12 = numpy.int32(511) #2^9 - 1
MASK13 = numpy.int32(16777215) #2^24 - 1
MASK2 = numpy.int32(65535) #2^16 - 1
MULT2 = numpy.int32(21069)
NORM = 4.656612873077392578125e-10;
NORM = 4.656612873077392578125e-10; #1./2^31
A1p0 = numpy.asarray([[0, 4194304, 129], [1, 0, 0], [0, 1, 0]])
A2p0 = numpy.asarray([[32768, 0, 32769], [1, 0, 0], [0, 1, 0]])
......@@ -593,6 +593,7 @@ class MRG_RandomStreams(object):
return rval
def n_streams(self, size):
# TODO: a smart way of choosing the number of streams
if isinstance(size, (tuple, list)):
r = 1
for s in size:
......@@ -601,12 +602,7 @@ class MRG_RandomStreams(object):
return r/6 # chosen as fastest for rbm_benchmark
else:
return r
try:
rval = int(size)
assert rval > 0
return rval
except:
pass
print >> sys.stderr, "MRG_RandomStreams Can't determine #streams from size (%s), guessing 30*256"%str(size)
return 30*256
......@@ -616,7 +612,7 @@ class MRG_RandomStreams(object):
node_rstate.default_update = new_rstate
return sample
def uniform(self, size=None, low=0.0, high=1.0, ndim=None, dtype=config.floatX):
def uniform(self, size=None, low=0.0, high=1.0, ndim=None, dtype=config.floatX, nstreams=None):
"""
Sample a tensor of given size whose element from a uniform
distribution between low and high.
......@@ -625,8 +621,10 @@ class MRG_RandomStreams(object):
ndim may be a plain integer to supplement the missing
information.
"""
if nstreams is None:
nstreams = self.n_streams(size)
if self.use_cuda and dtype=='float32':
rstates = self.get_substream_rstates(self.n_streams(size))
rstates = self.get_substream_rstates(nstreams)
rstates = rstates.flatten()
# HACK - we use fact that int32 and float32 have same size to
# sneak ints into the CudaNdarray type.
......@@ -643,11 +641,11 @@ class MRG_RandomStreams(object):
u = self.pretty_return(node_rstate,
*GPU_mrg_uniform.new(node_rstate, ndim, dtype, size))
else:
node_rstate = shared(self.get_substream_rstates(self.n_streams(size)))
u = self.pretty_return(node_rstate,
node_rstate = shared(self.get_substream_rstates(nstreams))
u = self.pretty_return(node_rstate,
*mrg_uniform.new(node_rstate, ndim, dtype, size))
r = u * (high-low) + low
if u.type.broadcastable != r.type.broadcastable:
raise NotImplementedError( 'Increase the size to match the broadcasting pattern of `low` and `high` arguments')
return r
......@@ -664,7 +662,7 @@ class MRG_RandomStreams(object):
# second half our U2's. See Wikipedia page:
# http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
n_samples = self.n_streams(size)
n_samples = numpy.prod(size)
evened = False
if n_samples % 2 == 1:
......@@ -684,8 +682,8 @@ class MRG_RandomStreams(object):
#normal_samples[n_samples/2:] = sqrt_ln_U1 * sin(2.0*numpy.pi*U2)
# so trying this instead
first_half = sqrt_ln_U1 * cos(2.0*numpy.pi*U2)
second_half = sqrt_ln_U1 * sin(2.0*numpy.pi*U2)
first_half = sqrt_ln_U1 * cos(2.0*cast(numpy.pi,dtype)*U2)
second_half = sqrt_ln_U1 * sin(2.0*cast(numpy.pi,dtype)*U2)
normal_samples = join(0, first_half, second_half)
final_samples = None
......
0.7353244530968368
0.6142074400559068
0.11007806099951267
0.6487741703167558
0.36619443260133266
0.2585685825906694
0.9489980279468
0.4309556516818702
0.12257590936496854
0.9760319022461772
0.6940806899219751
0.18046841165050864
0.003993193618953228
0.5351603352464736
0.02472442388534546
0.7705746139399707
0.8138928869739175
0.9650539481081069
0.24507411010563374
0.35767574002966285
0.4939101580530405
0.9027785388752818
0.27498403564095497
0.03848231676965952
0.3081609820947051
0.9062023567967117
0.009030417073518038
0.7953705741092563
0.5061718439683318
0.5975547162815928
0.5435514179989696
0.330895590595901
0.49919482320547104
0.9409166998229921
0.8276205519214272
0.5180770065635443
0.2319392478093505
0.36197659047320485
0.11120751267299056
0.5018561617471278
0.47852187464013696
0.7188052111305296
0.3030327311716974
0.6756376498378813
0.03624899685382843
0.34987151669338346
0.031225718092173338
0.06772322440519929
0.06820952938869596
0.9987128847278655
0.08330700965598226
0.9731874465942383
0.6345655219629407
0.7169904578477144
0.5793502484448254
0.7396790678612888
0.9926023166626692
0.7522463691420853
0.6768838302232325
0.3253784184344113
0.05375300580635667
0.4912636987864971
0.6485021142289042
0.3043024237267673
0.24868384934961796
0.8166692252270877
0.5274319797754288
0.31434731651097536
0.9961257497780025
0.3549888739362359
0.8423425843939185
0.21591948671266437
0.8698299624957144
0.17033040337264538
0.22816143138334155
0.11795765580609441
0.7024209997616708
0.15607220400124788
0.5493582566268742
0.5827712984755635
0.8592293248511851
0.785309090744704
0.6115233600139618
0.019046304281800985
0.2573754615150392
0.03130705002695322
0.6572857238352299
0.2033171127550304
0.5058645992539823
0.15793190989643335
0.6273676953278482
0.7285307059064507
0.265245848800987
0.6073522809892893
0.3896624594926834
0.27189663611352444
0.705508322454989
0.12823439668864012
0.39648046158254147
0.6584051586687565
0.07818163838237524
0.33628708589822054
0.20613654889166355
0.4277639244683087
0.5401185592636466
0.07513022050261497
0.4920963351614773
0.18214095244184136
0.3235122123733163
0.29958881670609117
0.7304665613919497
0.05146520072594285
0.2471711952239275
0.8797005712985992
0.5029069227166474
0.526974250562489
0.15968210343271494
0.4696163134649396
0.17607332626357675
0.362843859475106
0.7626461815088987
0.960180682130158
0.2536660563200712
0.710880630183965
0.28728525526821613
0.78940424695611
0.5242114691063762
0.8314367309212685
0.5898511232808232
0.015212591737508774
0.4944482510909438
0.06396882887929678
0.519745257217437
0.3558214954100549
0.04566589882597327
0.8368005948141217
0.979805170558393
0.7622401369735599
0.2578657674603164
0.5378834479488432
0.9926298237405717
0.4013678622432053
0.510077933780849
0.018817965406924486
0.21481098141521215
0.5357040031813085
0.8512061606161296
0.009026535786688328
0.27302876580506563
0.21162108704447746
0.5273029855452478
0.1086404686793685
0.14079083362594247
0.14331109775230289
0.8190496540628374
0.3947252375073731
0.28109811525791883
0.4066850380040705
0.9154577874578536
0.8929708409123123
0.13500721845775843
0.6328344400972128
0.5668322211131454
0.5448646773584187
0.5418433886952698
0.1141617177054286
0.15885689994320273
0.3867143443785608
0.5574855520389974
0.9173167692497373
0.22908265376463532
0.2047420055605471
0.05979115655645728
0.44121386017650366
0.9507057839073241
0.15352962678298354
0.23290937673300505
0.46427791472524405
8.519855327904224E-4
0.7947354763746262
0.6385304923169315
0.8696001935750246
0.6022149357013404
0.02299323584884405
0.5036068987101316
0.7541037476621568
0.9995524706318974
0.5888469088822603
0.3318097642622888
0.32492663664743304
0.6643895329907537
0.3656829949468374
0.4912424306385219
0.1900841724127531
0.5945985522121191
0.5709856003522873
0.35780346347019076
0.388774358201772
0.9446004652418196
0.14594348100945354
0.6250799335539341
0.5504232128150761
0.16380576323717833
0.7428167965263128
0.5522975320927799
0.655389194842428
0.47579632699489594
0.29743909696117043
0.6319712968543172
0.8178138644434512
0.2785301594994962
0.46813122322782874
0.2898342702537775
0.3287009159103036
0.12909299414604902
0.5859099281951785
0.1891166502609849
0.14497734932228923
0.5543341124430299
0.11846801871433854
0.8499364419840276
0.6603211951442063
0.35630465345457196
0.9680569358170033
0.6639338186942041
0.24408268369734287
0.030771974939852953
0.17226932244375348
0.7909302446059883
0.4327161009423435
0.6732332338578999
0.0849734228104353
0.7278832173906267
0.5536605608649552
0.7091806619428098
0.01754110073670745
0.8406045655719936
0.4815619965083897
0.0535086034797132
0.9874794147908688
0.07097038673236966
0.023544831201434135
0.42413365049287677
0.2970325672067702
0.48028060607612133
0.1990663455799222
0.6099434774369001
0.5050413520075381
0.7814605687744915
0.2650358658283949
0.5148864723742008
0.7807142282836139
0.0976667134091258
0.1516015767119825
0.6566055505536497
0.3946392172947526
0.8052488421089947
0.2964451564475894
0.07394864456728101
0.6961450576782227
0.01576960226520896
0.3434433783404529
0.08799878368154168
0.785557022318244
0.7494717631489038
0.45548726338893175
0.7672475459985435
0.5134695749729872
0.7000438082031906
0.49818582693114877
0.4293400440365076
0.9961911663413048
0.016769078094512224
0.013044610153883696
0.8661804771982133
0.7819683295674622
0.33438047766685486
0.966121535282582
0.7259743176400661
0.9887824659235775
0.9494950002990663
0.037431647535413504
0.8268285538069904
0.7355263698846102
0.3120658891275525
0.3588241692632437
0.471130283549428
0.7047113911248744
0.980073744431138
0.6762627908028662
0.869295812677592
0.9070576094090939
0.7852784115821123
0.16342713963240385
0.06330870278179646
0.6165989111177623
0.342802997212857
0.8414176292717457
0.6921333004720509
0.2594374935142696
0.4386491202749312
0.555369642097503
0.3660965468734503
0.6484139142557979
0.9005299550481141
0.25335891311988235
0.23852926725521684
0.9044205779209733
0.8694673446007073
0.46783560374751687
0.34727911837399006
0.19556640228256583
0.8798208390362561
0.3131108647212386
0.6312824171036482
0.5722001581452787
0.9441223978064954
0.7707183314487338
0.17464511329308152
0.08897313429042697
0.5044040409848094
0.5735817537643015
0.4467783076688647
0.19051036844030023
0.4578995378687978
0.6395204453729093
0.460110604763031
0.576092894654721
0.7038368303328753
0.5555814192630351
0.4171535111963749
0.8905360852368176
0.12811446748673916
0.6814800254069269
0.8502416326664388
0.12028768053278327
0.16715052351355553
0.3563938206061721
0.049810963682830334
0.27328392397612333
0.2407418810762465
0.6631906591355801
0.674483266659081
0.10489491606131196
0.04698043642565608
0.0812066881917417
0.312124056275934
0.6798701109364629
0.7286937129683793
0.9784366562962532
0.5650205011479557
0.833059043623507
0.8976074242964387
0.9441233519464731
0.6146679543890059
0.9019614770077169
0.5529476394876838
0.7665416682139039
0.39598167687654495
0.26307358546182513
0.14862705068662763
0.9521124185994267
0.17644333699718118
0.7684473628178239
0.4274347145110369
0.6102834036573768
0.9328651092946529
0.058630190789699554
0.04729347629472613
0.9597438890486956
0.6761234584264457
0.21832499839365482
0.20707347383722663
0.7274158899672329
0.9477886455133557
0.7821800266392529
0.07305240212008357
0.40399201214313507
0.22684293938800693
0.053185423370450735
0.330069282092154
0.6862794999033213
0.7821815954521298
0.22617859859019518
0.8118352359160781
0.015444065444171429
0.6732339109294116
0.9980663135647774
0.8833195753395557
0.21191661106422544
0.32638366147875786
0.5747208022512496
0.07515769777819514
0.02952938713133335
0.4980746121145785
0.8762881984002888
0.17386484891176224
0.10696181375533342
0.5474299816414714
0.016154434997588396
0.6960771018639207
0.47133891424164176
0.9015861176885664
0.782880718819797
0.6602211343124509
0.6578835439868271
0.6049443730153143
0.17169494135305285
0.9915955001488328
0.10519243823364377
0.37815978936851025
0.20879409136250615
0.45666090911254287
0.6456936108879745
0.684759714640677
0.8762755445204675
0.8020628895610571
0.1663151141256094
0.31246642768383026
0.18852565623819828
......@@ -366,7 +366,6 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
updates rules for all shared variables used in the scan
operation; this dictionary should be pass to ``theano.function``
"""
# General observation : this code is executed only once, at creation
# of the computational graph, so we don't yet need to be smart about
# anything ( to speed things up)
......@@ -404,7 +403,6 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
# compute number of sequences and number of outputs
n_seqs = len(seqs)
n_outs = len(outs_info)
# initialize the inplace map, sequences map and
# outputs map
''' Details:
......@@ -629,10 +627,27 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
# remove shared variables from the non sequences list
# such that we can compile the function ( the user has the option to add them when writing
# scan, because in some situations this might make the code more readable)
# Also duplicate the list of non sequences arguments to contain copies of the non-shared
# inputs ( this fixes the case when one of this inputs has a default update attached to it
# that belongs to some shared random stream ).
#
# Note : In that case, scan assumes that you do not want to draw new numbers at every call ( you
# would have made the internal function do that explicitly if you wanted to) but rather to
# use that initial draw as a matrix of values
new_non_seqs = []
notshared_other_args = []
notshared_other_args_copies = []
for non_seq in non_seqs:
if not isinstance(non_seq, SharedVariable):
if n_fixed_steps not in [-1,1]:
non_seq_copy = non_seq.type()
else:
non_seq_copy = non_seq
notshared_other_args += [non_seq]
notshared_other_args_copies += [non_seq_copy]
new_non_seqs += [non_seq_copy]
else:
new_non_seqs += [non_seq]
# add only the not shared variables to the arguments of the dummy
# function [ a function should not get shared variables as input ]
......@@ -640,10 +655,10 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
for arg in args:
if not isinstance(arg, SharedVariable):
dummy_args += [arg]
dummy_args += notshared_other_args
dummy_args += notshared_other_args_copies
# arguments for the lambda expression that gives us the output
# of the inner function
args += non_seqs
args += new_non_seqs
# when we apply the lambda expression we get a mixture of update rules
# and outputs that needs to be separated
......@@ -704,14 +719,6 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
# make the compilation as fast as possible by not applying any optimization
# or conversion to C [ note this region is not important for performance
# so we can do stuff as unoptimal as we wish ]
'''
Why did I use gof.graph.inputs to pick the inputs here ??
dummy_f = function(filter(lambda x: isinstance(x,gof.Variable) and \
not isinstance(x,SharedVariable) and not isinstance(x,gof.Constant), \
reversed(gof.graph.inputs(dummy_args))), outputs, updates = updates, mode = \
compile.mode.Mode(linker = 'py', optimizer = None) )
'''
if n_fixed_steps in [-1,1]:
''' We do have a special case here, namely is so might happen that
whatever we have in dummy_args is not sufficient to compile the
......@@ -726,6 +733,7 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
not isinstance(x,SharedVariable) and not isinstance(x,gof.Constant), \
gof.graph.inputs(dummy_args)), outputs, updates = updates, mode = compile.mode.Mode(linker='py',optimizer=None))
else:
dummy_f = function(filter(lambda x: isinstance(x, gof.Variable) and \
not isinstance(x,SharedVariable) and not isinstance(x,gof.Constant), \
dummy_args), outputs, updates = updates, mode = compile.mode.Mode(linker='py',optimizer=None))
......@@ -859,22 +867,8 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
else:
# If we do not actually need scan
for pos, inner_out in enumerate(inner_fn_outs):
# check if we are suppose to return just the last step
# we treat this case differently because the tensor we return
# in this case is different (it has one dimension less)
if return_steps.has_key(pos):
if return_steps[pos] != 1:
# if we return more then one step, we need to add
# one more dimension to our output and make it
# unbroadcastable
inner_fn_outs[pos] = tensor.unbroadcast(
tensor.shape_padleft(inner_out),0)
else:
# same if we do not have any information about how many
# steps we should return (to read return everything in this
# case
inner_fn_outs[pos] = tensor.unbroadcast(
tensor.shape_padleft(inner_out),0)
if isinstance(inner_out.type, tensor.TensorType):
inner_fn_outs[pos] = tensor.unbroadcast( tensor.shape_padleft(inner_out),0)
values = inner_fn_outs
......
......@@ -83,93 +83,6 @@ def conv2d(input, filters, image_shape=None, filter_shape=None,
return op(input, filters)
def conv2d_offset(input, filters, image_shape=None, filter_shape=None,
border_mode='valid', subsample=(1,1), offsets = [], **kargs):
"""
Build a graph for nnet convolutions with subsampling and offsetting.
Refer to conv2d for a general explanation of the context.
*Subsampling* means, we only compute convolutions at a fraction of the
sites. *Offsetting* convolutions in the context of subsampling means not
computing all of them at the same sites (starting in upper left corner).
This function allows offsets to be used.
Most parameters are shared with conv2d, except:
:param offsets: list of 2-tuples that specify sites wrt the subsampling
scheme. The filters are split evenly accross the different sites. For
example with subsample (2,2) we can use the sites [(0,0), (0,1), (1,0),
(1,1)]. An empty list is interpreted as all offsets.
"""
# There should be subsampling for offsets to make any sense.
if not (subsample[0] > 1 or subsample[1] > 1):
raise ValueError('conv2d_offset requires subsampling.')
# Haven't thought about this case.
if numpy.any(numpy.array(subsample) > filter_shape[2:]):
raise ValueError('conv2d_offset subsample greater than filter shape. Not supported?')
# No offsets specified is interpreted as all offsets.
if len(offsets) == 0:
offsets = []
for i in range(subsample[0]):
for j in range(subsample[1]):
offsets.append((i,j))
# Find the largest offsets in both image dimensions. Used to determine the
# size of the image used.
max_offset = list(offsets[0])
for offset in offsets:
if offset[0] > max_offset[0]:
max_offset[0] = offset[0]
if offset[1] > max_offset[1]:
max_offset[1] = offset[1]
if not (max_offset[0] < subsample[0] and max_offset[1] < subsample[1]):
raise ValueError('conv2d_offset: invalid offset sites.')
# Determine the reduced size of input so all feature maps get an input of
# the same size.
sub_image_shape = list(image_shape)
sub_image_shape[2] -= max_offset[0]
sub_image_shape[3] -= max_offset[1]
# Determine number of filters per offset position.
if (filter_shape[0] % len(offsets)) != 0:
print 'nfilts ', filter_shape[0], ' noffsets ', len(offsets)
raise ValueError('conv2d_offset: invalid number of filters wrt offsets.')
n_filters = filter_shape[0] / len(offsets)
sub_filter_shape = list(filter_shape)
sub_filter_shape[0] = n_filters
# Call conv2d at each offset using same fraction of kernels.
outputs = []
for i, offset in enumerate(offsets):
# Crop the input so all offsets get an input of the same size.
sub_input = input[:, :, offset[0]:sub_image_shape[2] + offset[0],
offset[1]:sub_image_shape[3] + offset[1]]
# Grab part of the filters.
sub_filters = filters[i*n_filters:(i+1)*n_filters]
out = conv2d(sub_input, sub_filters, sub_image_shape, sub_filter_shape,
border_mode=border_mode, subsample=subsample, **kargs)
outputs.append(out)
# Join the outputs on the leading axis.
if len(outputs) > 1:
output = tensor.join(1, *outputs)
else:
output = outputs[0]
outshp = ConvOp.getOutputShape(sub_image_shape[2:], filter_shape[2:], subsample, border_mode)
return [output, outshp]
class ConvOp(Op):
"""
......
......@@ -1888,6 +1888,27 @@ def local_log1p(node):
else:
return _fill_chain(T.log1p(T.add(*nonconsts)), scalar_inputs)
#TODO: in canonicalize, change log10 and log2 -> log
@register_stabilize
@gof.local_optimizer([T.log])
def local_log_add(node):
# log(exp(x)+exp(y))
#
# Suppose x >= y
# log(exp(x) + exp(y))
# log(exp(x) * (1 + exp(y)/exp(x)))
# x + log(1 + exp(y)/exp(x))
# x + log1p(exp(y)/exp(x))
# x + log1p(exp(y-x))
if node.op == T.log:
z = node.inputs[0]
if z.owner and z.owner.op == T.add:
zi = z.owner.inputs
pre_exp = [ x.owner.inputs[0] for x in zi if x.owner and x.owner.op == T.exp]
if len(pre_exp) == len(zi):
# all arguments to add are exp(<something>)
max_pre = T.maximum(*pre_exp)
return [max_pre + T.log1p(T.exp(T.add(*[p - max_pre for p in pre_exp])))]
def add_calculate(num, denum, aslist = False, out_type=None):
#TODO: make sure that this function and mul_calculate are similar
......
......@@ -930,6 +930,36 @@ def test_log1p():
theano.printing.debugprint(f)
assert [node.op for node in f.maker.env.toposort()] == [T.log1p]
def test_log_add():
m = theano.config.mode
if m == 'FAST_COMPILE':
m = 'FAST_RUN'
m = compile.mode.get_mode(m)
m = m.excluding('fusion')
# check some basic cases
x = dvector()
y = dvector()
f = function([x,y], T.log(T.exp(x) + T.exp(y)), mode=m)
theano.printing.debugprint( f)
print f([10000], [10000]) # causes overflow if handled incorrectly
assert numpy.allclose(f([10000], [10000]), 10000+numpy.log1p(1))
# test that it also works with more than two args, (this currently fails)
x = dvector()
y = dvector()
f = function([x,y], T.log(T.exp(x) + T.exp(y) + T.exp(x-y) + T.exp(x+y)), mode=m)
theano.printing.debugprint( f)
print f([10000], [10000]) # causes overflow if handled incorrectly
assert numpy.allclose(f([10000], [10000]), 20000)
#TODO: test that the optimization works in the presence of broadcasting.
#TODO: (write and) test that the optimization works with Sum in addition to working with Add.
class test_local_subtensor_unary(unittest.TestCase):
def test0(self):
......
......@@ -922,7 +922,24 @@ class T_Scan(unittest.TestCase):
assert len(analytic_grad[0]) == 3
def test_draw_as_input_to_scan(self):
trng = theano.tensor.shared_randomstreams.RandomStreams(123)
x = theano.tensor.matrix('x')
y = trng.binomial(size = x.shape, p = x)
z,updates = theano.scan(lambda a:a, non_sequences=y, n_steps=2)
f = theano.function([x],[y,z], updates = updates)
rng = numpy.random.RandomState(utt.fetch_seed())
nx = rng.uniform( size = (10,10) )
ny1,nz1 = f(nx)
ny2,nz2 = f(nx)
assert numpy.allclose([ny1,ny1], nz1)
assert numpy.allclose([ny2,ny2], nz2)
assert not numpy.allclose(ny1,ny2)
if __name__ == '__main__':
unittest.main()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论