提交 9d786083 authored 作者: Frederic Bastien's avatar Frederic Bastien

imported patch dx

上级 544ab6c3
......@@ -8,7 +8,7 @@ def getFilterOutShp(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
s = -1 if mode=='valid' else 1
inshp, kshp = N.array(inshp), N.array(kshp)
return N.int64(N.ceil((inshp[1:] + s*kshp - s*1)/\
N.array([dy,dx], dtype='float')))
N.array([dx,dy], dtype='float')))
class ConvOp(Op):
"""
......@@ -55,9 +55,12 @@ class ConvOp(Op):
else:
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
self.unroll_kern=1
if self.dx!=1 or self.dy!=1:
print "Warning, dx!=1 or dy!=1 only supported in python mode!"
raise NotImplementedError()
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
if (self.dx!=1 or self.dy!=1) and self.unroll_batch==0 and self.unroll_kern==0:
print "WARNING: dx!=1 or dy!=1 is only supported with unrolling! We will unroll by 1"
self.unroll_kern=1
self.unroll_batch=1
self.outshp = getFilterOutShp(self.imshp, kshp, (dx,dy), output_mode)
self.out_mode = output_mode
if not self.out_mode in ["valid", "full"]:
......@@ -132,6 +135,8 @@ class ConvOp(Op):
* will crash if filter the same size as input image
"""
assert self.dx==1 and self.dy==1#We didn't implemented the grad for that case. Can this be done?
####### Determine gradient on kernels ########
if inputs.ndim == 3:
inputs = tensor.shape_padleft(inputs,1)
......@@ -846,8 +851,12 @@ if ((!%(z)s)
}
int Os[2];
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;}
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;}
Os[0]=%(self_outshp0)s;
Os[1]=%(self_outshp1)s;
//I keep the formula to calculte Os in case we need it in the futur.
//if (mode == FULL) {Os[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); Os[1] = ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s));}
//else {Os[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); Os[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s));}
for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
......@@ -868,12 +877,14 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
int new_m;
for (int m=0; m < Os[0]; m++) {
for (int iter_m=0; iter_m < Os[0]; iter_m++) {
// Reposition index into input image based on requested output size
if (mode == FULL) new_m = m ;
else new_m = (m+dim_ker[0]-1);
int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker[0]-1);
for (int n=0; n < Os[1]; n++) { // loop over columns
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*%(self_dy)s;
"""%d
ret+=my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize*unroll_ksize)
ret+="""
......@@ -897,7 +908,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
}else{
//do the part where kernel is to the right of the img
int k=0,max_k=max((int)(n-dim_im[1])+1,0);
int k=0,max_k=max((int)(pos_n-dim_im[1])+1,0);
if(fill_value!=0){
for(k=0;k<max_k;k++){
......@@ -908,11 +919,11 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
max_k=min(pos_n+1,(int)dim_ker[1]);
"""%d
ret+=my_dup("const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
ret+="""
for (int ind1=n-k; k<max_k; k++,ind1--) {
for (int ind1=pos_n-k; k<max_k; k++,ind1--) {
"""%d
ret+=my_dup2("sum%(unroll_iter)s+= idx_hvals%(unroll_kiter)s[k] * idx_in%(unroll_biter)s[ind1];")
......@@ -931,7 +942,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
ret+=my_dup("const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
ret+=my_dup("const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker[1]];",unroll_ksize)
ret+="""
int new_n = (n+dim_ker[1]-1);
int new_n = (pos_n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
"""%d
......@@ -942,7 +953,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
}//for j
"""%d
ret+=my_dup("out%(unroll_iter)s[m*dim_zz[1]+n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize*unroll_ksize)
ret+=my_dup("out%(unroll_iter)s[iter_m*dim_zz[1]+iter_n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize*unroll_ksize)
ret+="""
}//for n
}//for m
......
......@@ -90,16 +90,18 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
####### test with new sp.convolve2 function ######
time1 = time.time()
hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp,
bsize, (1,1), mode=conv_mode)
bsize, (ss[0],ss[1]), mode=conv_mode)
propup = function([kern, img], hid)
propup1 = function([kern, img], hid,mode=Mode(linker="py"))
hidval = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
hidval = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])
# hidval = hidval[:,:,::ss[0],::ss[1]]
hidval = hidval.reshape(bsize, -1)
for i in range(repeat):
hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval1 = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
hidval1 = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])
# hidval1 = hidval1[:,:,::ss[0],::ss[1]]
hidval1 = hidval1.reshape(bsize, -1)
assert (N.abs(hidval-hidval1)<1e-5).all()
......@@ -113,7 +115,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
hidval1=outval.copy()
# ConvOp
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
l1shp=N.hstack((nkern,
getFilterOutShp(imshp, kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op)
......@@ -122,14 +124,14 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
time1 = time.time()
for i in range(repeat):
hidval2_ = propup2(imgval,w_flip)
hidval2 = hidval2_[:,:,0::ss[0],0::ss[1]]
hidval2 = hidval2_#[:,:,0::ss[0],0::ss[1]]
tctot += time.time() - time1
if conv_op_py:
time1 = time.time()
for i in range(repeat):
hidval3_ = propup3(imgval,w_flip)
hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
hidval3 = hidval3_#[:,:,0::ss[0],0::ss[1]]
tpytot += time.time() - time1
assert (N.abs(hidval2-hidval3)<1e-5).all()
else:
......@@ -235,7 +237,7 @@ class TestConvOp(unittest.TestCase):
# compute with new convolve2 (no timing info)
output4, outshp4 = convolve2(kerns, kshp, nkern, input,\
imshp, bsize, (1,1), bias=bias, mode=conv_mode)
imshp, bsize, (ss[0],ss[1]), bias=bias, mode=conv_mode)
# print 'output4', output4
ttime1 = time.time()
......@@ -244,7 +246,7 @@ class TestConvOp(unittest.TestCase):
# print 'out4', out4, img1d, filtersflipped
tconv2 += [time.time() - ttime1]
out4 = out4.reshape(bsize, nkern, outshp4[1], outshp4[2])
out4 = out4[:,:,0::ss[0],0::ss[1]]
out4 = out4#[:,:,0::ss[0],0::ss[1]]
out4 = out4.reshape(bsize, -1)
# compute with ConvOp
......@@ -252,18 +254,18 @@ class TestConvOp(unittest.TestCase):
inputs=dmatrix3()
kerns3=dmatrix3()
bia=T.dscalar()
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode)(inputs, kerns3)
conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode)(inputs, kerns3)
f2 = function([inputs, kerns3], conv_op, mode=Mode(linker="c"))
f3 = function([inputs, kerns3], conv_op, mode=Mode(linker="py"))
ttime1 = time.time()
out2_ = f2(img2d, filtersflipped)
out2__ = out2_[:,:,0::ss[0],0::ss[1]]
out2__ = out2_#[:,:,0::ss[0],0::ss[1]]
tconvop += [time.time() - ttime1]
out2___ = out2__.copy()
out2 = out2___ + biasvals.reshape(1,nkern,1,1)
out3_ = f3(img2d, filtersflipped)
out3__ = out3_[:,:,0::ss[0],0::ss[1]]
out3__ = out3_#[:,:,0::ss[0],0::ss[1]]
out3___ = out3__.copy()
out3 = out3___ + biasvals.reshape(1,nkern,1,1)
assert (N.abs(out2_-out3_)<1e-5).all()
......@@ -303,6 +305,7 @@ class TestConvOp(unittest.TestCase):
def test_multilayer_conv(self):
# fixed parameters
# test multiple configuration at the same time
bsizes = [6,6] # batch size
imshp_starts = [(1,13,14),(1,4,3)]
kshpss = ([[5,6],[7,4]],[[2,2],[2,2]])
......@@ -311,6 +314,7 @@ class TestConvOp(unittest.TestCase):
convmodes = ['valid','full']
do_convolve2=True
unroll = [(0,0),(1,1),(2,2),(3,2)]#(batch,kern)
do_speed_test = False
# TODO: this version show a bug that was fixed
# the test is included in the upper test.
......@@ -319,15 +323,6 @@ class TestConvOp(unittest.TestCase):
# nkerns = [2,2] # per output pixel
# ssizes = [(1,1),(2,2)]#2,2)]
#test speed
# bsize = 10 # batch size
# imshp_start = (1,50,49)#un square shape to test more corner case.
# kshps = ([11,12],[12,11])#un square shape to test more corner case.
# nkerns = [20,20] # per output pixel
# ssizes = [(1,1),]#(1,1)]#(2,2) bugged
# convmodes = ['valid','full']
# do_convolve2=False
N.set_printoptions(threshold=N.nan)
# symbolic stuff
......@@ -338,7 +333,7 @@ class TestConvOp(unittest.TestCase):
for i in range(len(kshpss)):
assert len(kshpss[i])==len(nkernss[i])==len(kerns)
if False:
if do_speed_test:
# calculate the speed up of different combination of unroll
# put the paramter to the same you will try.
......@@ -420,9 +415,47 @@ class TestConvOp(unittest.TestCase):
def test_ConvOpGrad(self):
<<<<<<< /u/bastienf/repos/Theano.ConvOp.dx/theano/sandbox/test_conv.py
"""
test the gradient in float and double
"""
=======
nkern = 3
bsize = 2
imgs = T.dmatrix('imgs')
kerns = T.dmatrix('kerns')
kshps = [(3,3), (5,5)]
for mode in 'valid', 'full':
for imshp in (5,5),(2,3,3),(3,6,6): # (12,10), (3,12,11):
# 'full' mode should support kernels bigger than the input
if mode == 'valid' and (kshps[0] > imshp[1]):
continue
visdim = 1 if len(imshp)!=3 else imshp[0]
for kshp in kshps:
imgvals = N.random.random(N.hstack((bsize,imshp)))
# print 'imgvals.shape = ', imgvals.shape
imgvals = imgvals.reshape(bsize,-1)
if visdim == 1:
kernvals = N.random.rand(nkern,kshp[0],kshp[1])
else:
kernvals = N.random.rand(nkern,visdim,kshp[0],kshp[1])
kernvals = kernvals.reshape(nkern,-1)
def testf(imgs, kerns):
out, outshp = convolve2(kerns, kshp, nkern, imgs,
imshp, bsize, mode=mode)
return out
try:
utt.verify_grad(testf, [imgvals, kernvals])
except NotImplementedError, e:
print e
def test_ConvOpGrad32(self):
nkern = 4
bsize = 3
types = ["float32", "float64"]
......@@ -468,19 +501,22 @@ class TestConvOp(unittest.TestCase):
tol=None if typ!="float32" else 0.16)
if __name__ == '__main__':
# t = TestConvOp("test_convolution")
t = TestConvOp("test_convolution")
# t.test_convolution()
# t.test_multilayer_conv()
t.test_multilayer_conv()
# from theano.tests import main
# main("test_sp")
bsize = 20 # batch size
imshp_start = (1,100,100)#un square shape to test more corner case.
kshps = ([11,12],[12,11])#un square shape to test more corner case.
nkerns = [20,20] # per output pixel
ssizes = [(1,1),]#(1,1)]#(2,2) bugged
convmodes = ['valid','full']
unroll_batch = 5
unroll_kern = 2
ctot=0
tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5)
print "total exec time %.3fs"%tctot
if False:
#used to lanch 8 jobs at the same time.
bsize = 20 # batch size
imshp_start = (1,100,100)#un square shape to test more corner case.
kshps = ([11,12],[12,11])#un square shape to test more corner case.
nkerns = [20,20] # per output pixel
ssizes = [(1,1),]#(1,1)]#(2,2) bugged
convmodes = ['valid','full']
unroll_batch = 5
unroll_kern = 2
ctot=0
tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5)
print "total exec time %.3fs"%tctot
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论