提交 4cc3b5f2 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merged

...@@ -704,12 +704,18 @@ class CLinker(link.Linker): ...@@ -704,12 +704,18 @@ class CLinker(link.Linker):
instantiate.customize.add_support_code(self.struct_code) instantiate.customize.add_support_code(self.struct_code)
instantiate.customize.add_support_code(static) instantiate.customize.add_support_code(static)
for extra_arg in ( for extra_arg in (
"-O2", "-O3",
# "-fno-signaling-nans",
#"-fno-finite-math-only",
#"-fmath-errno", "-fno-unsafe-math-optimizations", "-fno-finite-math-only", "-frounding-math", "-fsignaling-nans","-fno-cx-limited-range","-fno-fast-math",
"-ffast-math", "-ffast-math",
#"-fno-finite-math-only",
# "-fno-signaling-nans",
#"-fmath-errno", "-fno-unsafe-math-optimizations", "-fno-finite-math-only", "-frounding-math", "-fsignaling-nans","-fno-cx-limited-range","-fno-fast-math",
#"-fprefetch-loop-arrays", #"-fprefetch-loop-arrays",
#"-ftree-vect-loop-version", #"-ftree-vect-loop-version",
#"-ftree-loop-optimize", #"-ftree-loop-optimize",
#"-ftree-vectorize"): #"-ftree-vectorize",
"-w" #-w means supress all warnings "-w" #-w means supress all warnings
): ):
instantiate.customize.add_extra_compile_arg(extra_arg) instantiate.customize.add_extra_compile_arg(extra_arg)
......
差异被折叠。
...@@ -90,16 +90,18 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll ...@@ -90,16 +90,18 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
####### test with new sp.convolve2 function ###### ####### test with new sp.convolve2 function ######
time1 = time.time() time1 = time.time()
hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp, hid, outshp2 = convolve2(kern, kshp, nkern, img, imshp,
bsize, (1,1), mode=conv_mode) bsize, (ss[0],ss[1]), mode=conv_mode)
propup = function([kern, img], hid) propup = function([kern, img], hid)
propup1 = function([kern, img], hid,mode=Mode(linker="py")) propup1 = function([kern, img], hid,mode=Mode(linker="py"))
hidval = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1)) hidval = propup(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]] hidval = hidval.reshape(bsize,nkern,outshp2[-2],outshp2[-1])
# hidval = hidval[:,:,::ss[0],::ss[1]]
hidval = hidval.reshape(bsize, -1) hidval = hidval.reshape(bsize, -1)
for i in range(repeat): for i in range(repeat):
hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1)) hidval1 = propup1(w_flip.reshape(nkern,-1), imgval.reshape(bsize,-1))
hidval1 = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])[:,:,::ss[0],::ss[1]] hidval1 = hidval1.reshape(bsize,nkern,outshp2[-2],outshp2[-1])
# hidval1 = hidval1[:,:,::ss[0],::ss[1]]
hidval1 = hidval1.reshape(bsize, -1) hidval1 = hidval1.reshape(bsize, -1)
assert (N.abs(hidval-hidval1)<1e-5).all() assert (N.abs(hidval-hidval1)<1e-5).all()
...@@ -113,7 +115,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll ...@@ -113,7 +115,7 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
hidval1=outval.copy() hidval1=outval.copy()
# ConvOp # ConvOp
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4) conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
l1shp=N.hstack((nkern, l1shp=N.hstack((nkern,
getFilterOutShp(imshp, kshp, ss, conv_mode))) getFilterOutShp(imshp, kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op) propup2 = function([inputs4, kerns4], conv_op)
...@@ -122,14 +124,14 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll ...@@ -122,14 +124,14 @@ def exec_multilayer_conv_nnet(conv_mode, ss, bsize, imshp, kshps, nkerns, unroll
time1 = time.time() time1 = time.time()
for i in range(repeat): for i in range(repeat):
hidval2_ = propup2(imgval,w_flip) hidval2_ = propup2(imgval,w_flip)
hidval2 = hidval2_[:,:,0::ss[0],0::ss[1]] hidval2 = hidval2_#[:,:,0::ss[0],0::ss[1]]
tctot += time.time() - time1 tctot += time.time() - time1
if conv_op_py: if conv_op_py:
time1 = time.time() time1 = time.time()
for i in range(repeat): for i in range(repeat):
hidval3_ = propup3(imgval,w_flip) hidval3_ = propup3(imgval,w_flip)
hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]] hidval3 = hidval3_#[:,:,0::ss[0],0::ss[1]]
tpytot += time.time() - time1 tpytot += time.time() - time1
assert (N.abs(hidval2-hidval3)<1e-5).all() assert (N.abs(hidval2-hidval3)<1e-5).all()
else: else:
...@@ -235,7 +237,7 @@ class TestConvOp(unittest.TestCase): ...@@ -235,7 +237,7 @@ class TestConvOp(unittest.TestCase):
# compute with new convolve2 (no timing info) # compute with new convolve2 (no timing info)
output4, outshp4 = convolve2(kerns, kshp, nkern, input,\ output4, outshp4 = convolve2(kerns, kshp, nkern, input,\
imshp, bsize, (1,1), bias=bias, mode=conv_mode) imshp, bsize, (ss[0],ss[1]), bias=bias, mode=conv_mode)
# print 'output4', output4 # print 'output4', output4
ttime1 = time.time() ttime1 = time.time()
...@@ -244,7 +246,7 @@ class TestConvOp(unittest.TestCase): ...@@ -244,7 +246,7 @@ class TestConvOp(unittest.TestCase):
# print 'out4', out4, img1d, filtersflipped # print 'out4', out4, img1d, filtersflipped
tconv2 += [time.time() - ttime1] tconv2 += [time.time() - ttime1]
out4 = out4.reshape(bsize, nkern, outshp4[1], outshp4[2]) out4 = out4.reshape(bsize, nkern, outshp4[1], outshp4[2])
out4 = out4[:,:,0::ss[0],0::ss[1]] out4 = out4#[:,:,0::ss[0],0::ss[1]]
out4 = out4.reshape(bsize, -1) out4 = out4.reshape(bsize, -1)
# compute with ConvOp # compute with ConvOp
...@@ -252,18 +254,18 @@ class TestConvOp(unittest.TestCase): ...@@ -252,18 +254,18 @@ class TestConvOp(unittest.TestCase):
inputs=dmatrix3() inputs=dmatrix3()
kerns3=dmatrix3() kerns3=dmatrix3()
bia=T.dscalar() bia=T.dscalar()
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode)(inputs, kerns3) conv_op = ConvOp(imshp, kshp, nkern, bsize, ss[0],ss[1], conv_mode)(inputs, kerns3)
f2 = function([inputs, kerns3], conv_op, mode=Mode(linker="c")) f2 = function([inputs, kerns3], conv_op, mode=Mode(linker="c"))
f3 = function([inputs, kerns3], conv_op, mode=Mode(linker="py")) f3 = function([inputs, kerns3], conv_op, mode=Mode(linker="py"))
ttime1 = time.time() ttime1 = time.time()
out2_ = f2(img2d, filtersflipped) out2_ = f2(img2d, filtersflipped)
out2__ = out2_[:,:,0::ss[0],0::ss[1]] out2__ = out2_#[:,:,0::ss[0],0::ss[1]]
tconvop += [time.time() - ttime1] tconvop += [time.time() - ttime1]
out2___ = out2__.copy() out2___ = out2__.copy()
out2 = out2___ + biasvals.reshape(1,nkern,1,1) out2 = out2___ + biasvals.reshape(1,nkern,1,1)
out3_ = f3(img2d, filtersflipped) out3_ = f3(img2d, filtersflipped)
out3__ = out3_[:,:,0::ss[0],0::ss[1]] out3__ = out3_#[:,:,0::ss[0],0::ss[1]]
out3___ = out3__.copy() out3___ = out3__.copy()
out3 = out3___ + biasvals.reshape(1,nkern,1,1) out3 = out3___ + biasvals.reshape(1,nkern,1,1)
assert (N.abs(out2_-out3_)<1e-5).all() assert (N.abs(out2_-out3_)<1e-5).all()
...@@ -302,15 +304,21 @@ class TestConvOp(unittest.TestCase): ...@@ -302,15 +304,21 @@ class TestConvOp(unittest.TestCase):
print 'speed up ConvOp vs convolve2d: %.3f'%d.mean(),d print 'speed up ConvOp vs convolve2d: %.3f'%d.mean(),d
def test_multilayer_conv(self): def test_multilayer_conv(self):
print '\n\n*************************************************'
print ' TEST MULTILAYER CONVOLUTION'
print '*************************************************'
# fixed parameters # fixed parameters
# test multiple configuration at the same time
bsizes = [6,6] # batch size bsizes = [6,6] # batch size
imshp_starts = [(1,28,28),(1,4,4)] imshp_starts = [(1,13,14),(1,4,5)]
kshpss = ([[5,6],[7,4]],[[2,2],[2,2]]) kshpss = ([[5,6],[7,4]],[[2,2],[2,2]])
nkernss = [[20,40],[2,2]] # per output pixel nkernss = [[20,40],[2,2]] # per output pixel
ssizess = [[(1,1),(2,2)],[(1,1),(2,2)]] ssizess = [[(1,1),(1,2)],[(1,1),(2,2)]]
convmodes = ['valid','full'] convmodes = ['valid','full']
do_convolve2=True do_convolve2=True
unroll = [(0,0),(1,1),(2,2),(3,2)]#(batch,kern) unroll = [(0,0),(1,1),(2,2),(3,2)]#(batch,kern)
do_speed_test = False
# TODO: this version show a bug that was fixed # TODO: this version show a bug that was fixed
# the test is included in the upper test. # the test is included in the upper test.
...@@ -319,15 +327,6 @@ class TestConvOp(unittest.TestCase): ...@@ -319,15 +327,6 @@ class TestConvOp(unittest.TestCase):
# nkerns = [2,2] # per output pixel # nkerns = [2,2] # per output pixel
# ssizes = [(1,1),(2,2)]#2,2)] # ssizes = [(1,1),(2,2)]#2,2)]
#test speed
# bsize = 10 # batch size
# imshp_start = (1,50,49)#un square shape to test more corner case.
# kshps = ([11,12],[12,11])#un square shape to test more corner case.
# nkerns = [20,20] # per output pixel
# ssizes = [(1,1),]#(1,1)]#(2,2) bugged
# convmodes = ['valid','full']
# do_convolve2=False
N.set_printoptions(threshold=N.nan) N.set_printoptions(threshold=N.nan)
# symbolic stuff # symbolic stuff
...@@ -338,7 +337,7 @@ class TestConvOp(unittest.TestCase): ...@@ -338,7 +337,7 @@ class TestConvOp(unittest.TestCase):
for i in range(len(kshpss)): for i in range(len(kshpss)):
assert len(kshpss[i])==len(nkernss[i])==len(kerns) assert len(kshpss[i])==len(nkernss[i])==len(kerns)
if False: if do_speed_test:
# calculate the speed up of different combination of unroll # calculate the speed up of different combination of unroll
# put the paramter to the same you will try. # put the paramter to the same you will try.
...@@ -418,16 +417,19 @@ class TestConvOp(unittest.TestCase): ...@@ -418,16 +417,19 @@ class TestConvOp(unittest.TestCase):
d=N.asarray(ntot)/tpytot d=N.asarray(ntot)/tpytot
print 'speed up py theano(ConvOp) vs convolve2d: %.3fx'%d.mean(),d print 'speed up py theano(ConvOp) vs convolve2d: %.3fx'%d.mean(),d
def test_ConvOpGrad(self): def test_ConvOpGrad(self):
""" """
test the gradient in float and double test the gradient in float and double
""" """
print '\n\n*************************************************'
print ' TEST ConvOp.grad'
print '*************************************************'
nkern = 4 nkern = 4
bsize = 3 bsize = 3
types = ["float32", "float64"] types = ["float32", "float64"]
kshps = [(5,5), (6,7)] kshps = [(5,5), (6,7)]
imshps = [(1,5,5), (2,8,8), (3,8,7)] imshps = [(1,5,5), (2,8,7)]
modes = ['valid', 'full'] modes = ['valid', 'full']
unroll_batch=[0,1,3] unroll_batch=[0,1,3]
unroll_kern=[0,1,4] unroll_kern=[0,1,4]
...@@ -468,19 +470,22 @@ class TestConvOp(unittest.TestCase): ...@@ -468,19 +470,22 @@ class TestConvOp(unittest.TestCase):
tol=None if typ!="float32" else 0.16) tol=None if typ!="float32" else 0.16)
if __name__ == '__main__': if __name__ == '__main__':
# t = TestConvOp("test_convolution") t = TestConvOp("test_convolution")
# t.test_convolution() # t.test_convolution()
# t.test_multilayer_conv() t.test_multilayer_conv()
# from theano.tests import main # from theano.tests import main
# main("test_sp") # main("test_sp")
bsize = 20 # batch size if False:
imshp_start = (1,100,100)#un square shape to test more corner case. #used to lanch 8 jobs at the same time.
kshps = ([11,12],[12,11])#un square shape to test more corner case. bsize = 20 # batch size
nkerns = [20,20] # per output pixel imshp_start = (1,100,100)#un square shape to test more corner case.
ssizes = [(1,1),]#(1,1)]#(2,2) bugged kshps = ([11,12],[12,11])#un square shape to test more corner case.
convmodes = ['valid','full'] nkerns = [20,20] # per output pixel
unroll_batch = 5 ssizes = [(1,1),]#(1,1)]#(2,2) bugged
unroll_kern = 2 convmodes = ['valid','full']
ctot=0 unroll_batch = 5
tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5) unroll_kern = 2
print "total exec time %.3fs"%tctot ctot=0
tctot, tpytot, ntot = exec_multilayer_conv_nnet(convmodes[1], ssizes[0], bsize, imshp_start, kshps, nkerns, unroll_batch=unroll_batch, unroll_kern=unroll_kern, validate=False, do_print=False,repeat=5)
print "total exec time %.3fs"%tctot
...@@ -30,7 +30,7 @@ _mtypes = [sparse.csc_matrix, sparse.csr_matrix] ...@@ -30,7 +30,7 @@ _mtypes = [sparse.csc_matrix, sparse.csr_matrix]
_mtype_to_str = {sparse.csc_matrix: "csc", sparse.csr_matrix: "csr"} _mtype_to_str = {sparse.csc_matrix: "csc", sparse.csr_matrix: "csr"}
import scipy import scipy
if scipy.__version__ != '0.7.0': if not scipy.__version__.startswith('0.7.'):
sys.stderr.write("WARNING: scipy version = %s. We prefer version >=0.7.0 because it has bugs fixed in the sparse matrix code.\n" % scipy.__version__) sys.stderr.write("WARNING: scipy version = %s. We prefer version >=0.7.0 because it has bugs fixed in the sparse matrix code.\n" % scipy.__version__)
def _is_sparse_variable(x): def _is_sparse_variable(x):
......
...@@ -764,8 +764,10 @@ class CrossentropyCategorical1Hot(gof.Op): ...@@ -764,8 +764,10 @@ class CrossentropyCategorical1Hot(gof.Op):
_true_one_of_n = tensor.as_tensor_variable(true_one_of_n) _true_one_of_n = tensor.as_tensor_variable(true_one_of_n)
if _coding_dist.type.ndim != 2: if _coding_dist.type.ndim != 2:
raise TypeError('matrix required for argument: coding_dist') raise TypeError('matrix required for argument: coding_dist')
if _true_one_of_n.type != tensor.lvector: if _true_one_of_n.type not in (tensor.lvector, tensor.ivector):
raise TypeError('integer vector required for argument: true_one_of_n') raise TypeError('integer vector required for argument: true_one_of_n'
'(got type: %s instead of: %s)' % (_true_one_of_n.type,
tensor.lvector))
return gof.Apply(self, [_coding_dist, _true_one_of_n], [tensor.dvector()]) return gof.Apply(self, [_coding_dist, _true_one_of_n], [tensor.dvector()])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论