提交 93b9b4ff authored 作者: Frederic Bastien's avatar Frederic Bastien

use the gcc bug workaround in all case to make it more consistent and fix a bad…

use the gcc bug workaround in all case to make it more consistent and fix a bad code generation case.
上级 6c713d33
...@@ -956,11 +956,11 @@ using namespace std; ...@@ -956,11 +956,11 @@ using namespace std;
d["dim_zz_const"]="" d["dim_zz_const"]=""
d["dim_zz_affect"]=""" d["dim_zz_affect"]="""
if (mode == FULL) { if (mode == FULL) {
dim_zz[0] = (int)ceil((dim_im[0]+dim_ker[0]-1)/float(%(self_dx)s)); dim_zz[0] = (int)ceil((dim_im[0]+dim_ker0-1)/float(%(self_dx)s));
dim_zz[1] = (int)ceil((dim_im[1]+dim_ker[1]-1)/float(%(self_dy)s)); dim_zz[1] = (int)ceil((dim_im[1]+dim_ker1-1)/float(%(self_dy)s));
} else { } else {
dim_zz[0] = (int)ceil((dim_im[0]-dim_ker[0]+1)/float(%(self_dx)s)); dim_zz[0] = (int)ceil((dim_im[0]-dim_ker0+1)/float(%(self_dx)s));
dim_zz[1] = (int)ceil((dim_im[1]-dim_ker[1]+1)/float(%(self_dy)s)); dim_zz[1] = (int)ceil((dim_im[1]-dim_ker1+1)/float(%(self_dy)s));
} }
"""% d """% d
...@@ -1258,7 +1258,8 @@ int type_ker=PyArray_TYPE(%(filtersflipped)s); ...@@ -1258,7 +1258,8 @@ int type_ker=PyArray_TYPE(%(filtersflipped)s);
npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s}; npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s}; npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s}; const npy_intp dim_ker0=%(self_kshp0)s;
const npy_intp dim_ker1=%(self_kshp1)s;
PyArray_Dims img2d_shape; PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0}; npy_intp img2d_dim[4]={1,1,0,0};
...@@ -1354,8 +1355,8 @@ if ((!%(z)s) ...@@ -1354,8 +1355,8 @@ if ((!%(z)s)
} }
int Os[2]; int Os[2];
Os[0] = dim_im[0]-dim_ker[0]+1; Os[0] = dim_im[0]-dim_ker0+1;
Os[1] = dim_im[1]-dim_ker[1]+1; Os[1] = dim_im[1]-dim_ker1+1;
// allocate a temporary buffer for storing the inner product of each nth kernel row // allocate a temporary buffer for storing the inner product of each nth kernel row
// with each row of an image // with each row of an image
...@@ -1504,7 +1505,8 @@ int type_ker=PyArray_TYPE(%(filtersflipped)s); ...@@ -1504,7 +1505,8 @@ int type_ker=PyArray_TYPE(%(filtersflipped)s);
npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s}; npy_intp dim_zz[2]={%(self_outshp0)s,%(self_outshp1)s};
npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s}; npy_intp dim_im[2]={%(self_imshp1)s,%(self_imshp2)s};
npy_intp dim_ker[2]={%(self_kshp0)s,%(self_kshp1)s}; const npy_intp dim_ker0=%(self_kshp0)s;
const npy_intp dim_ker1=%(self_kshp1)s;
PyArray_Dims img2d_shape; PyArray_Dims img2d_shape;
npy_intp img2d_dim[4]={1,1,0,0}; npy_intp img2d_dim[4]={1,1,0,0};
...@@ -1650,7 +1652,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -1650,7 +1652,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
// Reposition index into input image based on requested output size // Reposition index into input image based on requested output size
int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image int pos_m = iter_m*%(self_dx)s;//The position of the patch in the image
if (mode == FULL) new_m = pos_m ; if (mode == FULL) new_m = pos_m ;
else new_m = (pos_m+dim_ker[0]-1); else new_m = (pos_m+dim_ker0-1);
for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns for (int iter_n=0; iter_n < Os[1]; iter_n++) { // loop over columns
int pos_n=iter_n*%(self_dy)s; int pos_n=iter_n*%(self_dy)s;
...@@ -1660,16 +1662,16 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -1660,16 +1662,16 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
// Sum over kernel, if index into image is out of bounds // Sum over kernel, if index into image is out of bounds
// fill with the value // fill with the value
for (int j=0; j < dim_ker[0]; j++) { for (int j=0; j < dim_ker0; j++) {
int ind0 = (new_m-j); int ind0 = (new_m-j);
if(mode==FULL){ if(mode==FULL){
"""%d """%d
ret+=my_dup("const %(type)s * idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker[1]];",unroll_ksize) ret+=my_dup("const %(type)s * idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];",unroll_ksize)
ret+=""" ret+="""
if(ind0 < 0 || ind0 >= dim_im[0]){ if(ind0 < 0 || ind0 >= dim_im[0]){
if(fill_value!=0) if(fill_value!=0)
for (int k=0; k < dim_ker[1]; k++) { for (int k=0; k < dim_ker1; k++) {
"""%d """%d
ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;") ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
ret+=""" ret+="""
...@@ -1688,7 +1690,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -1688,7 +1690,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
}else {k=max_k;} }else {k=max_k;}
//do the part where the kernel is on the img //do the part where the kernel is on the img
max_k=min(pos_n+1,(int)dim_ker[1]); max_k=min(pos_n+1,(int)dim_ker1);
"""%d """%d
ret+=my_dup("const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize) ret+=my_dup("const %(type)s * idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
ret+=""" ret+="""
...@@ -1700,7 +1702,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -1700,7 +1702,7 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
} }
//do the part to the left of the img //do the part to the left of the img
if(fill_value!=0) if(fill_value!=0)
for(;k<dim_ker[1];k++){ for(;k<dim_ker1;k++){
"""%d """%d
ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;") ret+=my_dup2("sum%(unroll_iter)s += idx_hvals%(unroll_kiter)s[k] * fill_value;")
ret+=""" ret+="""
...@@ -1709,11 +1711,11 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){ ...@@ -1709,11 +1711,11 @@ for(int b=0;b< %(self_bsize)s ;b+=%(unroll_bsize)s){
}else{//valid mode }else{//valid mode
"""%d """%d
ret+=my_dup("const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize) ret+=my_dup("const %(type)s* idx_in%(unroll_iter)s=&in%(unroll_iter)s[ind0*dim_im[1]];", unroll_bsize)
ret+=my_dup("const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker[1]];",unroll_ksize) ret+=my_dup("const %(type)s* idx_hvals%(unroll_iter)s=&hvals%(unroll_iter)s[j*dim_ker1];",unroll_ksize)
ret+=""" ret+="""
int new_n = (pos_n+dim_ker[1]-1); int new_n = (pos_n+dim_ker1-1);
for (int k=0,last=new_n; k < dim_ker[1]; k++,last--) { for (int k=0,last=new_n; k < dim_ker1; k++,last--) {
"""%d """%d
ret+=my_dup2("sum%(unroll_iter)s+=idx_hvals%(unroll_kiter)s[k]*idx_in%(unroll_biter)s[last];") ret+=my_dup2("sum%(unroll_iter)s+=idx_hvals%(unroll_kiter)s[k]*idx_in%(unroll_biter)s[last];")
ret+=""" ret+="""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论