提交 8f094db5 authored 作者: Frederic Bastien's avatar Frederic Bastien

make a GPU convolution kernel more precise to don't have test fail due to random data.

上级 f7778651
......@@ -722,7 +722,9 @@ conv_rows_stack2( float* img, float* kern, float* out,
if(preload_full_kern) idx_kern=&d_kern[row*kern_wid];
else idx_kern=d_kern;
const float* idx_in=&d_img[((shared_row+row)%nb_rows)*img_wid+out_col];
convolutionRowNoFlip<KERN_WIDTH>(sum,idx_in,idx_kern,kern_wid);
float sum_ =0.0f;
convolutionRowNoFlip<KERN_WIDTH>(sum_,idx_in,idx_kern,kern_wid);
sum+=sum_;//We pass by an intermediate variable to have more precission.
}
}
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论