提交 ee1dbd1c authored 作者: Frederic Bastien's avatar Frederic Bastien

in ConvOp.__init__, if the unroll value are not good, we try to find a good one.…

in ConvOp.__init__, if the unroll value are not good, we try to find a good one. Otherwise we set 1 and print an OPTIMISATION WARNING. We do this as it is probably safer to use default value for the optimisation parameter if they are bad and execute then to fail.
上级 96b7efa1
...@@ -43,9 +43,17 @@ class ConvOp(Op): ...@@ -43,9 +43,17 @@ class ConvOp(Op):
self.unroll_kern=unroll_kern self.unroll_kern=unroll_kern
if self.unroll_batch>0 and self.bsize % self.unroll_batch!=0: if self.unroll_batch>0 and self.bsize % self.unroll_batch!=0:
raise Exception("unroll_batch(%s) should be 0 or a multiple of bsize(%s)"%(str(self.unroll_batch),str(self.bsize))) if self.bsize<self.unroll_batch:
self.unroll_batch = self.bsize
else:
self.unroll_batch=1
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_batch(%s) must be 0 or a multiple of bsize(%s). We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_batch),str(self.bsize))
if self.unroll_kern>0 and self.nkern % unroll_kern!=0: if self.unroll_kern>0 and self.nkern % unroll_kern!=0:
raise Exception("unroll_kern(%s) should be 0 or a multiple of nkern(%s)"%(str(self.unroll_kern),str(self.nkern))) if self.nkern<self.unroll_kern:
self.unroll_kern = self.nkern
else:
self.unroll_kern=1
print "OPTIMISATION WARNING: in ConvOp.__init__() unroll_kern(%s) should be 0 or a multiple of nkern(%s)We revert it to 1. This won't change the result, but may make it slower."%(str(self.unroll_kern),str(self.nkern))
if self.dx!=1 or self.dy!=1: if self.dx!=1 or self.dy!=1:
print "Warning, dx!=1 or dy!=1 only supported in python mode!" print "Warning, dx!=1 or dy!=1 only supported in python mode!"
raise NotImplementedError() raise NotImplementedError()
...@@ -146,13 +154,13 @@ class ConvOp(Op): ...@@ -146,13 +154,13 @@ class ConvOp(Op):
un_b = bsize un_b = bsize
else: else:
un_b = 1 un_b = 1
print "WARNING, can't determine a good unroll value for the batch in the gradient. Maybe you can optimize this!" print "OPTIMISATION WARNING: in ConvOp.grad() we can't determine a good unroll value for the batch. Maybe you can optimize this!"
if un_k!=0 and nkern%un_k!=0: if un_k!=0 and nkern%un_k!=0:
if nkern<un_k: if nkern<un_k:
un_k = nkern un_k = nkern
else: else:
un_k = 1 un_k = 1
print "WARNING, can't determine a good unroll value for the kerner in the gradient. Maybe you can optimize this!" print "OPTIMISATION WARNING: in ConvOp.grad() we can't determine a good unroll value for the kernel. Maybe you can optimize this!"
dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid', dw = ConvOp(imshp, kshp, nkern, bsize, 1,1, output_mode='valid',
unroll_batch=un_b, unroll_kern=un_k)(img,filters) unroll_batch=un_b, unroll_kern=un_k)(img,filters)
......
...@@ -32,6 +32,123 @@ def flip(kern, kshp): ...@@ -32,6 +32,123 @@ def flip(kern, kshp):
return flip return flip
global_rng = N.random.RandomState(3423489)
dmatrix4=T.TensorType('float64', (False, False, False, False))
def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll_batch=0, unroll_kern=0, img=T.dmatrix(), validate=True, conv_op_py=False, do_convolve2=False, do_print=True, repeat=1):
# build actual input images
imgval = global_rng.rand(bsize, imshp[0], imshp[1], imshp[2])
a=T.dmatrix()
kerns = [a for i in nkerns]
inputs4=dmatrix4()
kerns4=dmatrix4()
# for each layer
ntot=0
tctot=0
tpytot=0
for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(nkerns))):
if do_print:
print '************* layer %i ***************' % n_layer
print conv_mode, ss, n_layer, kshp, nkern
# actual values
w = global_rng.random_sample(N.r_[nkern,imshp[0],kshp])
w_flip = flip(w,kshp).reshape(w.shape)
## manual implementation
# check first stage
padimg = imgval
if conv_mode == 'full':
padimg_shp = N.array(imshp[1:]) + 2*(N.array(kshp) - N.array([1,1]))
padimg = N.zeros(N.r_[bsize,imshp[0],padimg_shp])
padimg[:, :, kshp[0]-1:-kshp[0]+1,
kshp[1]-1:-kshp[1]+1] = imgval
outshp = N.hstack((nkern, getFilterOutShp(imshp, kshp, ss, conv_mode)))
time1 = time.time()
outval = N.zeros(N.r_[bsize,outshp])
if validate:
# causes an atexit problem
from scipy.signal.sigtools import _convolve2d
from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
val = _valfrommode(conv_mode)
bval = _bvalfromboundary('fill')
for b in range(bsize): # loop over batches
for n in range(nkern): # loop over filters
for i in range(imshp[0]): # loop over input feature maps
outval[b,n,...] += _convolve2d(\
imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]]
ntot += time.time() - time1
if do_convolve2:
####### test with new sp.convolve2 function ######
time1 = time.time()
hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp,
bsize, (1,1), mode=conv_mode)
propup = function([kern, img], hid)
propup1 = function([kern, img], hid,mode=Mode(linker="py"))
hidval = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
hidval = hidval.reshape(bsize, -1)
for i in range(repeat):
hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval1 = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
hidval1 = hidval1.reshape(bsize, -1)
assert (N.abs(hidval-hidval1)<1e-5).all()
temp = N.abs(outval.reshape(bsize,-1) - hidval)
if validate:
assert (temp < 1e-5).all()
else:
hid = img #we don't need it, but it make the flow easier flow
hidval=outval.copy()#to keep the same memory
hidval1=outval.copy()
# ConvOp
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
l1shp=N.hstack((nkern,
getFilterOutShp(imshp, kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op)
propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))
time1 = time.time()
for i in range(repeat):
hidval2_ = propup2(imgval,w_flip)
hidval2 = hidval2_[:,:,0::ss[0],0::ss[1]]
tctot += time.time() - time1
if conv_op_py:
time1 = time.time()
for i in range(repeat):
hidval3_ = propup3(imgval,w_flip)
hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
tpytot += time.time() - time1
assert (N.abs(hidval2-hidval3)<1e-5).all()
else:
tpytot += 0
if validate:
temp = N.abs(outval - hidval2)
assert (temp < 1e-5).all()
if validate and conv_op_py:
temp = N.abs(outval - hidval3)
assert (temp < 1e-5).all()
img, imshp = hid, tuple(outshp)
imgval = outval.reshape(bsize,outshp[0],outshp[1],outshp[2])
return tctot, tpytot, ntot
class TestConvOp(unittest.TestCase): class TestConvOp(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
...@@ -185,24 +302,22 @@ class TestConvOp(unittest.TestCase): ...@@ -185,24 +302,22 @@ class TestConvOp(unittest.TestCase):
print 'speed up ConvOp vs convolve2d: %.3f'%d.mean(),d print 'speed up ConvOp vs convolve2d: %.3f'%d.mean(),d
def test_multilayer_conv(self): def test_multilayer_conv(self):
# causes an atexit problem
from scipy.signal.sigtools import _convolve2d
from scipy.signal.signaltools import _valfrommode, _bvalfromboundary
# fixed parameters # fixed parameters
bsize = 1 # batch size bsizes = [6,6] # batch size
imshp_start = (1,28,28) imshp_starts = [(1,28,28),(1,4,4)]
kshps = ([5,6],[7,4]) kshpss = ([[5,6],[7,4]],[[2,2],[2,2]])
nkerns = [20,40] # per output pixel nkernss = [[20,40],[2,2]] # per output pixel
ssizes = [(1,1),(2,2)] ssizess = [[(1,1),(2,2)],[(1,1),(2,2)]]
convmodes = ['valid','full'] convmodes = ['valid','full']
do_theano=True do_convolve2=True
unroll = [(0,0),(1,1),(2,2),(3,2)]#(batch,kern)
# TODO: this version show a bug. # TODO: this version show a bug that was fixed
imshp_start = (1,4,4) # the test is included in the upper test.
kshps = ([2,2],[2,2])#,[7,4]) # imshp_start = (1,4,4)
nkerns = [2,2] # per output pixel # kshps = ([2,2],[2,2])#,[7,4])
ssizes = [(1,1),(2,2)]#2,2)] # nkerns = [2,2] # per output pixel
# ssizes = [(1,1),(2,2)]#2,2)]
#test speed #test speed
# bsize = 10 # batch size # bsize = 10 # batch size
...@@ -211,7 +326,7 @@ class TestConvOp(unittest.TestCase): ...@@ -211,7 +326,7 @@ class TestConvOp(unittest.TestCase):
# nkerns = [20,20] # per output pixel # nkerns = [20,20] # per output pixel
# ssizes = [(1,1),]#(1,1)]#(2,2) bugged # ssizes = [(1,1),]#(1,1)]#(2,2) bugged
# convmodes = ['valid','full'] # convmodes = ['valid','full']
# do_theano=False # do_convolve2=False
N.set_printoptions(threshold=N.nan) N.set_printoptions(threshold=N.nan)
...@@ -220,113 +335,8 @@ class TestConvOp(unittest.TestCase): ...@@ -220,113 +335,8 @@ class TestConvOp(unittest.TestCase):
img = T.dmatrix() img = T.dmatrix()
rng = N.random.RandomState(3423489) rng = N.random.RandomState(3423489)
tctot, tpytot, ntot = [], [], [] tctot, tpytot, ntot = [], [], []
for i in range(len(kshpss)):
dmatrix4=T.TensorType('float64', (False, False, False, False)) assert len(kshpss[i])==len(nkernss[i])==len(kerns)
inputs4=dmatrix4()
kerns4=dmatrix4()
assert len(kshps)==len(nkerns)==len(kerns)
def do_test(conv_mode, ss, unroll_batch=0, unroll_kern=0, img=img, validate=True,conv_op_py=False):
# build actual input images
imgval = rng.rand(bsize, imshp_start[0], imshp_start[1], imshp_start[2])
imshp=imshp_start
# for each layer
ntot=0
tctot=0
tpytot=0
for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(kerns))):
print '************* layer %i ***************' % n_layer
print conv_mode, ss, n_layer, kshp, nkern
# actual values
w = rng.random_sample(N.r_[nkern,imshp[0],kshp])
w_flip = flip(w,kshp).reshape(w.shape)
## manual implementation
# check first stage
padimg = imgval
if conv_mode == 'full':
padimg_shp = N.array(imshp[1:]) + 2*(N.array(kshp) - N.array([1,1]))
padimg = N.zeros(N.r_[bsize,imshp[0],padimg_shp])
padimg[:, :, kshp[0]-1:-kshp[0]+1,
kshp[1]-1:-kshp[1]+1] = imgval
outshp = N.hstack((nkern, getFilterOutShp(imshp, kshp, ss, conv_mode)))
time1 = time.time()
outval = N.zeros(N.r_[bsize,outshp])
if validate:
val = _valfrommode(conv_mode)
bval = _bvalfromboundary('fill')
for b in range(bsize): # loop over batches
for n in range(nkern): # loop over filters
for i in range(imshp[0]): # loop over input feature maps
outval[b,n,...] += _convolve2d(\
imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]]
ntot += time.time() - time1
if do_theano:
####### test with new sp.convolve2 function ######
time1 = time.time()
hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp,
bsize, (1,1), mode=conv_mode)
propup = function([kern, img], hid)
propup1 = function([kern, img], hid,mode=Mode(linker="py"))
hidval = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
hidval = hidval.reshape(bsize, -1)
hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval1 = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]]
hidval1 = hidval1.reshape(bsize, -1)
assert (N.abs(hidval-hidval1)<1e-5).all()
temp = N.abs(outval.reshape(bsize,-1) - hidval)
if validate:
assert (temp < 1e-5).all()
else:
hid = img #we don't need it, but it make the flow easier flow
hidval=outval.copy()#to keep the same memory
hidval1=outval.copy()
# ConvOp
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
l1shp=N.hstack((nkern,
getFilterOutShp(imshp, kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op)
propup3 = function([inputs4, kerns4], conv_op, mode=Mode(linker="py"))
time1 = time.time()
hidval2_ = propup2(imgval,w_flip)
hidval2 = hidval2_[:,:,0::ss[0],0::ss[1]]
tctot += time.time() - time1
if conv_op_py:
time1 = time.time()
hidval3_ = propup3(imgval,w_flip)
hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
tpytot += time.time() - time1
assert (N.abs(hidval2-hidval3)<1e-5).all()
else:
tpytot += 0
if validate:
temp = N.abs(outval - hidval2)
assert (temp < 1e-5).all()
if validate and conv_op_py:
temp = N.abs(outval - hidval3)
assert (temp < 1e-5).all()
img, imshp = hid, tuple(outshp)
imgval = outval.reshape(bsize,outshp[0],outshp[1],outshp[2])
return tctot, tpytot, ntot
if False: if False:
# calculate the speed up of different combination of unroll # calculate the speed up of different combination of unroll
...@@ -336,16 +346,16 @@ class TestConvOp(unittest.TestCase): ...@@ -336,16 +346,16 @@ class TestConvOp(unittest.TestCase):
unroll_batch = [1,2,4,5,10,20] unroll_batch = [1,2,4,5,10,20]
unroll_kern = [1,2,4,5,10,20] unroll_kern = [1,2,4,5,10,20]
# unroll_batch = [1,2,5] unroll_batch = [1,2,5]
# unroll_kern = [1,2,5] unroll_kern = [1,2,5]
bsize = 20 # batch size bsize = 20 # batch size
imshp_start = (1,50,49)#un square shape to test more corner case. imshp_start = (1,48,48)#un square shape to test more corner case.
kshps = ([11,12],[12,11])#un square shape to test more corner case. kshps = ([11,12],[12,11])#un square shape to test more corner case.
nkerns = [20,20] # per output pixel nkerns = [20,20] # per output pixel
ssizes = [(1,1),]#(1,1)]#(2,2) bugged ssizes = [(1,1),]#(1,1)]#(2,2) bugged
convmodes = ['valid','full'] convmodes = ['valid','full']
do_theano=False do_convolve2=False
a=T.dmatrix() a=T.dmatrix()
kerns = [a for i in nkerns] kerns = [a for i in nkerns]
...@@ -360,7 +370,7 @@ class TestConvOp(unittest.TestCase): ...@@ -360,7 +370,7 @@ class TestConvOp(unittest.TestCase):
tctot, tpytot, ntot=[],[],[] tctot, tpytot, ntot=[],[],[]
for conv_mode, n_mode in zip(convmodes,range(len(convmodes))): for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
for ss, n_ss in zip(ssizes,range(len(ssizes))): for ss, n_ss in zip(ssizes,range(len(ssizes))):
tctot_, tpytot_, ntot_ = do_test(conv_mode, ss, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate) tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate)
tctot+=[tctot_] tctot+=[tctot_]
tpytot+=[tpytot_] tpytot+=[tpytot_]
ntot+=[ntot_] ntot+=[ntot_]
...@@ -370,7 +380,7 @@ class TestConvOp(unittest.TestCase): ...@@ -370,7 +380,7 @@ class TestConvOp(unittest.TestCase):
tctot,tpytot,ntot=0,0,0 tctot,tpytot,ntot=0,0,0
for conv_mode, n_mode in zip(convmodes,range(len(convmodes))): for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
for ss, n_ss in zip(ssizes,range(len(ssizes))): for ss, n_ss in zip(ssizes,range(len(ssizes))):
tctot_, tpytot_, ntot_ = do_test(conv_mode, ss, unroll_batch=0, unroll_kern=0, validate=validate) tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp_start, kshps, nkerns, unroll_batch=0, unroll_kern=0, validate=validate)
tctot+=tctot_ tctot+=tctot_
tpytot+=tpytot_ tpytot+=tpytot_
ntot+=ntot_ ntot+=ntot_
...@@ -385,13 +395,19 @@ class TestConvOp(unittest.TestCase): ...@@ -385,13 +395,19 @@ class TestConvOp(unittest.TestCase):
print "min %.3fs"%t.min(), "min param(batch unloop size/kernel unloop size)", t_b_k[t.argmin()] print "min %.3fs"%t.min(), "min param(batch unloop size/kernel unloop size)", t_b_k[t.argmin()]
print "speedup vs (1/1)%.3fx, vs old %.3fx"% (t.max()/t.min(),tctot/t.min()) print "speedup vs (1/1)%.3fx, vs old %.3fx"% (t.max()/t.min(),tctot/t.min())
return return
for conv_mode, n_mode in zip(convmodes,range(len(convmodes))): for i in range(len(kshpss)):
for ss, n_ss in zip(ssizes,range(len(ssizes))): for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
tctot_, tpytot_, ntot_ = do_test(conv_mode, ss) for ss, n_ss in zip(ssizess[i],range(len(ssizess[i]))):
tctot+=[tctot_] for un_b, un_k in unroll:
tpytot+=[tpytot_] tctot_, tpytot_, ntot_ = exec_multilayer_conv_nnet(
ntot+=[ntot_] conv_mode, ss, bsizes[i], imshp_starts[i],
kshpss[i], nkernss[i],
img=img, unroll_batch=un_b, unroll_kern=un_k,
validate=True)
tctot+=[tctot_]
tpytot+=[tpytot_]
ntot+=[ntot_]
print '**** Multilayer Convolution Profiling Results ****' print '**** Multilayer Convolution Profiling Results ****'
print 'Numpy convolve2d processing time: %.3fs'%sum(ntot),ntot print 'Numpy convolve2d processing time: %.3fs'%sum(ntot),ntot
...@@ -465,8 +481,19 @@ class TestConvOp(unittest.TestCase): ...@@ -465,8 +481,19 @@ class TestConvOp(unittest.TestCase):
utt.verify_grad(testf, [imgvals, kernvals]) utt.verify_grad(testf, [imgvals, kernvals])
if __name__ == '__main__': if __name__ == '__main__':
t = TestConvOp("test_convolution") # t = TestConvOp("test_convolution")
t.test_convolution() # t.test_convolution()
# t.test_multilayer_conv() # t.test_multilayer_conv()
# from theano.tests import main # from theano.tests import main
# main("test_sp") # main("test_sp")
bsize = 20 # batch size
imshp_start = (1,100,100)#un square shape to test more corner case.
kshps = ([11,12],[12,11])#un square shape to test more corner case.
nkerns = [20,20] # per output pixel
ssizes = [(1,1),]#(1,1)]#(2,2) bugged
convmodes = ['valid','full']
unroll_batch = 5
unroll_kern = 2
ctot=0
tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5)
print "total exec time %.3fs"%tctot
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论