提交 57489fbe authored 作者: Frederic Bastien's avatar Frederic Bastien

added option to ConvOp that allow using the unroll version of the code. The…

added option to ConvOp that allow using the unroll version of the code. The default is to don't use this version of the code.
上级 fdd808d7
......@@ -16,8 +16,11 @@ class ConvOp(Op):
In development.
"""
def __init__(self, imshp, kshp, nkern, bsize, dx, dy, output_mode='valid'):
def __init__(self, imshp, kshp, nkern, bsize, dx, dy, output_mode='valid', unroll_batch=0, unroll_kern=0):
"""
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
unroll_nkern. idem as unroll_batch but unroll the kernel loop.
"""
imshp = tuple(imshp)
if len(imshp)==2:
self.imshp = (1,)+imshp
......@@ -31,6 +34,11 @@ class ConvOp(Op):
self.bsize=bsize
self.dx=dx
self.dy=dy
self.unroll_batch=unroll_batch
self.unroll_kern=unroll_kern
assert not(unroll_batch>0 and unroll_kern>0)
if self.dx!=1 or self.dy!=1:
print "Warning, dx!=1 or dy!=1 only supported in python mode!"
raise NotImplementedError()
......@@ -164,7 +172,9 @@ using namespace std;
if node.inputs[0].type.dtype=="float32": d["type"]="float"
elif node.inputs[0].type.dtype=="float64": d["type"]="double"
else: raise Exception("Type %s not implemented"%node.inputs[0].type.dtype)
if self.unroll_batch>0:
return gen_conv_code_unroll_bsize(d, self.unroll_batch)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5? under 10?
if self.out_mode == 'valid':
return _conv_op_code_valid_gemm % d
else:
......@@ -617,6 +627,8 @@ Py_XDECREF(img2d);
def gen_conv_code_unroll_bsize(d,unloop_bsize=1):
""" c_code for ConvOp that unroll the batch size loop
"""
d["unloop_bsize"]=unloop_bsize
def my_dup(st):
s=""
......
......@@ -207,13 +207,13 @@ class TestConvOp(unittest.TestCase):
ssizes = [(1,1),(2,2)]#2,2)]
#test speed
# bsize = 10 # batch size
# imshp_start = (1,50,50)
# kshps = ([12,12],[12,12])
# nkerns = [20,20] # per output pixel
# ssizes = [(1,1),(1,1)]#(2,2) bugged
# convmodes = ['valid','full']
# do_theano=True
bsize = 10 # batch size
imshp_start = (1,50,50)
kshps = ([12,12],[12,12])
nkerns = [20,20] # per output pixel
ssizes = [(1,1),]#(1,1)]#(2,2) bugged
convmodes = ['valid','full']
do_theano=False
N.set_printoptions(threshold=N.nan)
......@@ -297,7 +297,7 @@ class TestConvOp(unittest.TestCase):
hidval1=outval.copy()
# ConvOp
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode)(inputs4, kerns4)
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=10)(inputs4, kerns4)
l1shp=N.hstack((nkern,
getFilterOutShp(imshp, kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op)
......@@ -309,15 +309,15 @@ class TestConvOp(unittest.TestCase):
t2ctot += [time.time() - time1]
time1 = time.time()
hidval3_ = propup3(imgval,w_flip)
hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
# hidval3_ = propup3(imgval,w_flip)
# hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
t2pytot += [time.time() - time1]
assert (N.abs(hidval2-hidval3)<1e-5).all()
# assert (N.abs(hidval2-hidval3)<1e-5).all()
temp = N.abs(outval - hidval2)
assert (temp < 1e-5).all()
temp = N.abs(outval - hidval3)
assert (temp < 1e-5).all()
# temp = N.abs(outval - hidval3)
# assert (temp < 1e-5).all()
img, imshp = hid, tuple(outshp)
imgval = outval.reshape(bsize,outshp[0],outshp[1],outshp[2])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论