提交 4bded893 authored 作者: James Bergstra's avatar James Bergstra

merge

......@@ -16,8 +16,11 @@ class ConvOp(Op):
In development.
"""
def __init__(self, imshp, kshp, nkern, bsize, dx, dy, output_mode='valid'):
def __init__(self, imshp, kshp, nkern, bsize, dx, dy, output_mode='valid', unroll_batch=0, unroll_kern=0):
"""
unroll_batch. If >0 will use a version that will unroll the batch loop by the value of the option. By default don't use this version of the code.
unroll_nkern. idem as unroll_batch but unroll the kernel loop.
"""
imshp = tuple(imshp)
if len(imshp)==2:
self.imshp = (1,)+imshp
......@@ -31,6 +34,14 @@ class ConvOp(Op):
self.bsize=bsize
self.dx=dx
self.dy=dy
self.unroll_batch=unroll_batch
self.unroll_kern=unroll_kern
if self.unroll_batch>0 and self.bsize % self.unroll_batch!=0:
raise Exception("unroll_batch(%s) should be 0 or a multiple of bsize(%s)"%(str(self.unroll_batch),str(self.bsize)))
if self.unroll_kern>0 and self.nkern % unroll_kern!=0:
raise Exception("unroll_kern(%s) should be 0 or a multiple of nkern(%s)"%(str(self.unroll_kern),str(self.nkern)))
if self.dx!=1 or self.dy!=1:
print "Warning, dx!=1 or dy!=1 only supported in python mode!"
raise NotImplementedError()
......@@ -164,7 +175,12 @@ using namespace std;
if node.inputs[0].type.dtype=="float32": d["type"]="float"
elif node.inputs[0].type.dtype=="float64": d["type"]="double"
else: raise Exception("Type %s not implemented"%node.inputs[0].type.dtype)
if self.unroll_kern>0 and self.unroll_batch>0:
print "return unrolled batch and kern code by",self.unroll_batch, self.unroll_kern
return gen_conv_code_unroll_batch_kern(d, self.unroll_batch,
self.unroll_kern)
#TODO: should we choose the unroll size automatically with the bigger divisor under 5? under 10?
if self.out_mode == 'valid':
return _conv_op_code_valid_gemm % d
else:
......@@ -344,11 +360,11 @@ for(int b=0;b< %(self_bsize)s;b++){
int ind0 = (new_m-j);
if(mode==FULL){
const %(type)s * idx2=&hvals[j*dim_ker[1]];
const %(type)s * idx_hvals=&hvals[j*dim_ker[1]];
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
sum+= idx2[k] * fill_value;
sum+= idx_hvals[k] * fill_value;
}
}else{
//do the part where kernel is to the right of the img
......@@ -357,27 +373,27 @@ for(int b=0;b< %(self_bsize)s;b++){
if(fill_value!=0){
for(k=0;k<max_k;k++){
sum+= idx2[k]*fill_value;
sum+= idx_hvals[k]*fill_value;
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
const %(type)s * idx1=&in[ind0*dim_im[1]];
const %(type)s * idx_in=&in[ind0*dim_im[1]];
for (int ind1=n-k; k<max_k; k++,ind1--) {
sum+= idx2[k] * idx1[ind1];
sum+= idx_hvals[k] * idx_in[ind1];
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker[1];k++) sum+= idx2[k]*fill_value;
for(;k<dim_ker[1];k++) sum+= idx_hvals[k]*fill_value;
}
}else{
const %(type)s* idx1=&in[ind0*dim_im[1]]; //JB: should be dim_im[1] right? (was dim_im[0])
const %(type)s* idx2=&hvals[j*dim_ker[1]];
const %(type)s* idx_in=&in[ind0*dim_im[1]]; //JB: should be dim_im[1] right? (was dim_im[0])
const %(type)s* idx_hvals=&hvals[j*dim_ker[1]];
int new_n = (n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
sum+=idx2[k]*idx1[last];
sum+=idx_hvals[k]*idx_in[last];
}
}
}//for j
......@@ -614,3 +630,250 @@ free(kbuf);
}
Py_XDECREF(img2d);
"""
def gen_conv_code_unroll_batch_kern(d,unroll_bsize=1, unroll_ksize=1):
""" c_code for ConvOp that unroll the batch size loop
"""
assert unroll_bsize>0 and unroll_ksize>0
d["unroll_bsize"]=unroll_bsize
d["unroll_ksize"]=unroll_ksize
def my_dup(st,size):
s=""
for i in range(size):
d["unroll_iter"]=i
s+=st%d
return s+"\n"
def my_dup2(st):
s=""
iter=0
for i in range(unroll_bsize):
d["unroll_biter"]=i
for j in range(unroll_ksize):
d["unroll_kiter"]=j
d["unroll_iter"]=iter
iter+=1
s+=st%d
return s+"\n"
ret = """
int mode=-1,typenum=0, typenum_f=0;
PyArrayObject *ain1=NULL, *ain2=NULL, *filtersflipped_arr=NULL, *img2d_arr=NULL;
const %(type)s fill_value = 0;
int type_im=PyArray_TYPE(%(img2d)s);
int type_ker=PyArray_TYPE(%(filtersflipped)s);
npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s};
PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0};
img2d_shape.ptr=img2d_dim;
img2d_shape.len=4;
PyArray_Dims kerns_shape;
npy_intp kerns_dim[4]={1,1,0,0};
kerns_shape.ptr=kerns_dim;
kerns_shape.len=4;
PyObject *img2d=NULL, *contig, *filtersflipped=NULL;
string s="%(self_out_mode)s";
if(%(img2d)s->nd==2){
img2d_dim[3]=%(img2d)s->dimensions[1];
img2d_dim[2]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==3){
img2d_dim[3]=%(img2d)s->dimensions[2];
img2d_dim[2]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
}else if(%(img2d)s->nd==4){
img2d_dim[3]=%(img2d)s->dimensions[3];
img2d_dim[2]=%(img2d)s->dimensions[2];
img2d_dim[1]=%(img2d)s->dimensions[1];
img2d_dim[0]=%(img2d)s->dimensions[0];
}else {
PyErr_SetString(PyExc_ValueError, "img don't have a good shape");
%(fail)s;
}
if(%(filtersflipped)s->nd==3){
kerns_dim[3]=%(filtersflipped)s->dimensions[2];
kerns_dim[2]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else if(%(filtersflipped)s->nd==4){
kerns_dim[3]=%(filtersflipped)s->dimensions[3];
kerns_dim[2]=%(filtersflipped)s->dimensions[2];
kerns_dim[1]=%(filtersflipped)s->dimensions[1];
kerns_dim[0]=%(filtersflipped)s->dimensions[0];
}else{
PyErr_SetString(PyExc_ValueError, "kernel don't have a good shape");
%(fail)s;
}
img2d = PyArray_Newshape(%(img2d)s,&img2d_shape, PyArray_CORDER);
img2d_arr = (PyArrayObject*)img2d;
if ((img2d_arr->strides[3] != sizeof(%(type)s))
|| (img2d_arr->strides[2] != img2d_arr->dimensions[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)img2d));
Py_DECREF(img2d);
img2d = contig;
if (!PyArray_ISCONTIGUOUS(img2d)){
PyErr_SetString(PyExc_ValueError, "img2d isn't contiguous");
%(fail)s;
}
}
img2d_arr = (PyArrayObject*)img2d;
filtersflipped = PyArray_Newshape(%(filtersflipped)s,&kerns_shape, PyArray_CORDER);
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if ((filtersflipped_arr->strides[3] != sizeof(%(type)s))
|| (filtersflipped_arr->strides[2] != filtersflipped_arr->dimensions[3]*sizeof(%(type)s))){
contig = (PyObject*)(PyArray_GETCONTIGUOUS((PyArrayObject*)filtersflipped));
Py_DECREF(filtersflipped);
filtersflipped = contig;
if (!PyArray_ISCONTIGUOUS(filtersflipped)){
PyErr_SetString(PyExc_ValueError, "filtersflipped isn't contiguous");
%(fail)s;
}
}
filtersflipped_arr = (PyArrayObject*)filtersflipped;
if(s=="valid") mode=0;
else if(s=="full") mode=2;
else {PyErr_SetString(PyExc_ValueError, "invalid mode, only full and valid are supported"); %(fail)s;};
typenum = PyArray_ObjectType((PyObject*)%(img2d)s, 0);
typenum_f = PyArray_ObjectType((PyObject*)%(filtersflipped)s, 0);
if (typenum < 0) {PyErr_SetString(PyExc_ValueError, "Invalid type"); %(fail)s;}
if (typenum != typenum_f) {PyErr_SetString(PyExc_ValueError, "Input types must match"); %(fail)s;}
if (!img2d) %(fail)s;
if (!filtersflipped) %(fail)s;
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(%(z)s->dimensions[0] != %(self_bsize)s)
||(%(z)s->dimensions[1] != %(self_nkern)s)
||(%(z)s->dimensions[2] != dim_zz[0])
|| (%(z)s->dimensions[3] != dim_zz[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
npy_intp dims[4] = {0,0,0,0};
if(!dims) %(fail)s;
dims[0]=%(self_bsize)s;
dims[1]=%(self_nkern)s;
dims[2]=dim_zz[0];
dims[3]=dim_zz[1];
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
}else{
//PyArray_FILLWBYTE((PyObject*)%(z)s,0);
}
int Os[2];
if (mode == FULL) {Os[0] = dim_im[0]+dim_ker[0]-1; Os[1] = dim_im[1]+dim_ker[1]-1;}
else {Os[0] = dim_im[0]-dim_ker[0]+1; Os[1] = dim_im[1]-dim_ker[1]+1;}
for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
for(int n_kern=0;n_kern<%(self_nkern)s;n_kern+=%(unroll_ksize)s){
//assertions
if (%(z)s->strides[0] != %(z)s->dimensions[1] *%(z)s->dimensions[2] *%(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[1] != %(z)s->dimensions[2] * %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[2] != %(z)s->dimensions[3] * sizeof(%(type)s)) %(fail)s;
if (%(z)s->strides[3] != sizeof(%(type)s)) %(fail)s;
"""%d
ret+=my_dup2("%(type)s * __restrict__ out%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(%(z)s,b+%(unroll_biter)s,n_kern+%(unroll_kiter)s));")
ret+=my_dup("for (int i = 0; i < dim_zz[0]*dim_zz[1]; ++i) out%(unroll_iter)s[i] = 0;",unroll_bsize*unroll_ksize)
ret+="""
for(int stack_size=0;stack_size<%(self_imshp0)s;stack_size++){
"""%d
ret+=my_dup("const %(type)s * __restrict__ in%(unroll_iter)d=(%(type)s *)(PyArray_GETPTR2(img2d,b+%(unroll_iter)s,stack_size));", unroll_bsize)
ret+=my_dup("const %(type)s * __restrict__ hvals%(unroll_iter)s=(%(type)s *)(PyArray_GETPTR2(filtersflipped,n_kern+%(unroll_iter)s,stack_size));",unroll_ksize)
ret+="""
int new_m;
for (int m=0; m < Os[0]; m++) {
// Reposition index into input image based on requested output size
if (mode == FULL) new_m = m ;
else new_m = (m+dim_ker[0]-1);
for (int n=0; n < Os[1]; n++) { // loop over columns
"""%d
ret+=my_dup("%(type)s sum%(unroll_iter)s=0;", unroll_bsize*unroll_ksize)
ret+="""
// Sum over kernel, if index into image is out of bounds
// fill with the value
for (int j=0; j < dim_ker[0]; j++) {
int ind0 = (new_m-j);
if(mode==FULL){
"""%d
ret+=my_dup("const %(type)s * idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker[1]];",unroll_ksize)
ret+="""
if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) {
"""%d
ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
ret+="""
}
}else{
//do the part where kernel is to the right of the img
int k=0,max_k=max((int)(n-dim_im[1])+1,0);
if(fill_value!=0){
for(k=0;k<max_k;k++){
"""%d
ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
ret+="""
}
}else {k=max_k;}
//do the part where the kernel is on the img
max_k=min(n+1,(int)dim_ker[1]);
"""%d
ret+=my_dup("const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
ret+="""
for (int ind1=n-k; k<max_k; k++,ind1--) {
"""%d
ret+=my_dup2("sum%(unroll_iter)s+= idx_hvals%(unroll_kiter)s[k] * idx_in%(unroll_biter)s[ind1];")
ret+="""
}
//do the part to the left of the img
if(fill_value!=0)
for(;k<dim_ker[1];k++){
"""%d
ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
ret+="""
}
}
}else{//valid mode
"""%d
ret+=my_dup("const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
ret+=my_dup("const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker[1]];",unroll_ksize)
ret+="""
int new_n = (n+dim_ker[1]-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) {
"""%d
ret+=my_dup2("sum%(unroll_iter)s+=idx_hvals%(unroll_kiter)s[k]*idx_in%(unroll_biter)s[last];")
ret+="""
}
}
}//for j
"""%d
# ret+=my_dup("out%(unroll_iter)s[m*dim_zz[1]+n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize)
ret+=my_dup("out%(unroll_iter)s[m*dim_zz[1]+n] %(affectation)s sum%(unroll_iter)s;", unroll_bsize*unroll_ksize)
# ret+=my_dup("cout<<sum%(unroll_iter)s<<endl;",unroll_bsize)
ret+="""
}//for n
}//for m
}//for stack_size
}//for n_kern
}//for b
Py_XDECREF(img2d);
Py_XDECREF(filtersflipped);
"""
return ret
......@@ -63,7 +63,6 @@ class TestConvOp(unittest.TestCase):
# fixed parameters
bsize = 7 # batch size
imshp = (5,4)# image shape
print >> sys.stderr, "WARNING: only square shape tested"
kshps = [(2,3)]
nkern = 6 # nb kernel
ssizes = [(1,1)] #step size
......@@ -72,7 +71,6 @@ class TestConvOp(unittest.TestCase):
# fixed parameters
bsize = 7 # batch size
imshp = (5,4)# image shape
print >> sys.stderr, "WARNING: only square shape tested"
kshps = [(2,3)]
nkern = 6 # nb kernel
ssizes = [(1,1)] #step size
......@@ -112,7 +110,7 @@ class TestConvOp(unittest.TestCase):
# now test with real values
img2d = 1 + N.arange(bsize*N.prod(imshp)).reshape((bsize,)+imshp)
print 'img2d', img2d
# print 'img2d', img2d
img1d = img2d.reshape(bsize,-1)
# create filters (need to be flipped to use convolve2d)
......@@ -121,12 +119,12 @@ class TestConvOp(unittest.TestCase):
# compute with new convolve2 (no timing info)
output4, outshp4 = convolve2(kerns, kshp, nkern, input,\
imshp, bsize, (1,1), bias=bias, mode=conv_mode)
print 'output4', output4
# print 'output4', output4
ttime1 = time.time()
f = function([kerns, bias, input], output4)
out4 = f(filtersflipped.reshape(nkern,-1), biasvals, img1d)
print 'out4', out4, img1d, filtersflipped
# print 'out4', out4, img1d, filtersflipped
tconv2 += [time.time() - ttime1]
out4 = out4.reshape(bsize, nkern, outshp4[1], outshp4[2])
out4 = out4[:,:,0::ss[0],0::ss[1]]
......@@ -208,12 +206,12 @@ class TestConvOp(unittest.TestCase):
#test speed
# bsize = 10 # batch size
# imshp_start = (1,50,50)
# kshps = ([12,12],[12,12])
# imshp_start = (1,50,49)#un square shape to test more corner case.
# kshps = ([11,12],[12,11])#un square shape to test more corner case.
# nkerns = [20,20] # per output pixel
# ssizes = [(1,1),(1,1)]#(2,2) bugged
# ssizes = [(1,1),]#(1,1)]#(2,2) bugged
# convmodes = ['valid','full']
# do_theano=True
# do_theano=False
N.set_printoptions(threshold=N.nan)
......@@ -221,23 +219,25 @@ class TestConvOp(unittest.TestCase):
kerns = [T.matrix(),T.dmatrix()]
img = T.dmatrix()
rng = N.random.RandomState(3423489)
tctot, tpytot, t2ctot, t2pytot, ntot, convtot = [], [], [], [], [], []
tctot, tpytot, ntot = [], [], []
dmatrix4=T.TensorType('float64', (False, False, False, False))
inputs4=dmatrix4()
kerns4=dmatrix4()
assert len(kshps)==len(nkerns)==len(kerns)
for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
for ss, n_ss in zip(ssizes,range(len(ssizes))):
def do_test(conv_mode, ss, unroll_batch=0, unroll_kern=0, img=img, validate=True,conv_op_py=False):
# build actual input images
imgval = rng.rand(bsize, imshp_start[0], imshp_start[1], imshp_start[2])
imshp=imshp_start
# for each layer
for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(kerns))):
ntot=0
tctot=0
tpytot=0
for kshp, kern, nkern, n_layer in zip(kshps, kerns, nkerns, range(len(kerns))):
print '************* layer %i ***************' % n_layer
print conv_mode, ss, n_layer, kshp, nkern
......@@ -259,14 +259,15 @@ class TestConvOp(unittest.TestCase):
time1 = time.time()
outval = N.zeros(N.r_[bsize,outshp])
val = _valfrommode(conv_mode)
bval = _bvalfromboundary('fill')
for b in range(bsize): # loop over batches
for n in range(nkern): # loop over filters
for i in range(imshp[0]): # loop over input feature maps
outval[b,n,...] += _convolve2d(\
imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]]
ntot += [time.time() - time1]
if validate:
val = _valfrommode(conv_mode)
bval = _bvalfromboundary('fill')
for b in range(bsize): # loop over batches
for n in range(nkern): # loop over filters
for i in range(imshp[0]): # loop over input feature maps
outval[b,n,...] += _convolve2d(\
imgval[b,i,...], w_flip[n,i,...],1,val, bval, 0)[0::ss[0],0::ss[1]]
ntot += time.time() - time1
if do_theano:
####### test with new sp.convolve2 function ######
......@@ -286,18 +287,16 @@ class TestConvOp(unittest.TestCase):
assert (N.abs(hidval-hidval1)<1e-5).all()
temp = N.abs(outval.reshape(bsize,-1) - hidval)
assert (temp < 1e-5).all()
if validate:
assert (temp < 1e-5).all()
else:
hid = img #we don't need it, but it make the flow easier flow
convtot+=[-1]
tctot+=[-1]
tpytot+=[-1]
hidval=outval.copy()#to keep the same memory
hidval1=outval.copy()
# ConvOp
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode)(inputs4, kerns4)
conv_op = ConvOp(imshp, kshp, nkern, bsize, 1,1, conv_mode, unroll_batch=unroll_batch, unroll_kern=unroll_kern)(inputs4, kerns4)
l1shp=N.hstack((nkern,
getFilterOutShp(imshp, kshp, ss, conv_mode)))
propup2 = function([inputs4, kerns4], conv_op)
......@@ -306,30 +305,90 @@ class TestConvOp(unittest.TestCase):
time1 = time.time()
hidval2_ = propup2(imgval,w_flip)
hidval2 = hidval2_[:,:,0::ss[0],0::ss[1]]
t2ctot += [time.time() - time1]
tctot += time.time() - time1
time1 = time.time()
hidval3_ = propup3(imgval,w_flip)
hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
t2pytot += [time.time() - time1]
assert (N.abs(hidval2-hidval3)<1e-5).all()
if conv_op_py:
time1 = time.time()
hidval3_ = propup3(imgval,w_flip)
hidval3 = hidval3_[:,:,0::ss[0],0::ss[1]]
tpytot += time.time() - time1
assert (N.abs(hidval2-hidval3)<1e-5).all()
else:
tpytot += 0
temp = N.abs(outval - hidval2)
assert (temp < 1e-5).all()
temp = N.abs(outval - hidval3)
assert (temp < 1e-5).all()
if validate:
temp = N.abs(outval - hidval2)
assert (temp < 1e-5).all()
if validate and conv_op_py:
temp = N.abs(outval - hidval3)
assert (temp < 1e-5).all()
img, imshp = hid, tuple(outshp)
imgval = outval.reshape(bsize,outshp[0],outshp[1],outshp[2])
return tctot, tpytot, ntot
if False:
# calculate the speed up of different combination of unroll
# put the paramter to the same you will try.
validate=False# we don't validate the result to have it much faster!
unroll_batch = [0,1,2,4,5,10,20]
unroll_kern = [0,2,4,5,10,20]
# unroll_batch = [0,2,5]
# unroll_kern = [0,2,5]
bsize = 20 # batch size
imshp_start = (1,50,49)#un square shape to test more corner case.
kshps = ([11,12],[12,11])#un square shape to test more corner case.
nkerns = [20,20] # per output pixel
ssizes = [(1,1),]#(1,1)]#(2,2) bugged
convmodes = ['valid','full']
do_theano=False
a=T.dmatrix()
kerns = [a for i in nkerns]
assert len(kshps)==len(nkerns)==len(kerns)
timing = N.zeros((len(unroll_batch),len(unroll_kern),3))
t_b_k=[]
for unroll_b, n_b in zip(unroll_batch,range(len(unroll_batch))):
for unroll_k, n_k in zip(unroll_kern,range(len(unroll_kern))):
t_b_k+=[str(unroll_b)+"/"+str(unroll_k)]
tctot, tpytot, ntot=[],[],[]
for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
for ss, n_ss in zip(ssizes,range(len(ssizes))):
tctot_, tpytot_, ntot_ = do_test(conv_mode, ss, unroll_batch=unroll_b, unroll_kern=unroll_k, validate=validate)
tctot+=[tctot_]
tpytot+=[tpytot_]
ntot+=[ntot_]
timing[n_b,n_k]=[sum(tctot), sum(tpytot), sum(ntot)]
# print timing
t=timing[:,:,0]#We select only the c timing.
print t_b_k
print t
print "max %.3fs"%t.max(), "max param(batch unloop size/kernel unloop size)", t_b_k[t.argmax()]
print "min %.3fs"%t.min(), "min param(batch unloop size/kernel unloop size)", t_b_k[t.argmin()]
print "speedup %.3fx"% (t.max()/t.min())
return
for conv_mode, n_mode in zip(convmodes,range(len(convmodes))):
for ss, n_ss in zip(ssizes,range(len(ssizes))):
tctot_, tpytot_, ntot_ = do_test(conv_mode, ss)
tctot+=[tctot_]
tpytot+=[tpytot_]
ntot+=[ntot_]
print '**** Multilayer Convolution Profiling Results ****'
print 'Numpy convolve2d processing time: %.3fs'%sum(ntot),ntot
print 'c Theano(ConvOp) processing time: %.3fs'%sum(t2ctot),t2ctot
print 'py Theano(ConvOp) processing time: %.3fs'%sum(t2pytot),t2pytot
print 'convolve processing time: %.3fs'%sum(convtot),convtot
d=N.asarray(ntot)/t2ctot
print 'c Theano(ConvOp) processing time: %.3fs'%sum(tctot),tctot
print 'py Theano(ConvOp) processing time: %.3fs'%sum(tpytot),tpytot
d=N.asarray(ntot)/tctot
print 'speed up c theano(ConvOp) vs convolve2d: %.3f'%d.mean(),d
d=N.asarray(ntot)/t2pytot
d=N.asarray(ntot)/tpytot
print 'speed up py theano(ConvOp) vs convolve2d: %.3f'%d.mean(),d
......@@ -393,3 +452,10 @@ class TestConvOp(unittest.TestCase):
kernvals = kernvals.reshape(nkern,-1)
utt.verify_grad(testf, [imgvals, kernvals])
if __name__ == '__main__':
t = TestConvOp("test_convolution")
t.test_convolution()
# t.test_multilayer_conv()
# from theano.tests import main
# main("test_sp")
......@@ -1956,6 +1956,12 @@ def horizontal_stack(*args):
L{TensorType}s must have the same shape along all dimensions but the
second.
"""
# Note: 'horizontal_stack' and 'vertical_stack' do not behave exactly like
# Numpy's hstack and vstack functions. This is intended, because Numpy's
# functions have potentially confusing/incoherent behavior (try them on 1D
# arrays). If this is fixed in a future version of Numpy, it may be worth
# trying to get closer to Numpy's way of doing things. In the meantime,
# better keep different names to emphasize the implementation divergences.
assert len(args) >= 2
for arg in args: assert arg.type.ndim == 2
return concatenate(args, axis=1)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论