提交 f6ecd8bd authored 作者: Frederic Bastien's avatar Frederic Bastien

comment change.

上级 03e07dbc
......@@ -400,7 +400,7 @@ conv_patch_stack( float* img, float* kern, float* out,
//as we store the result of only the good thread.
//This was with nvcc 3.0 on an GTX470 card.
if(out_row<out_len)
convolutionRowNoFlip<KERN_WIDTH>(sum,idx_in,idx_kern,kern_wid);//idx_in,idx_in fail, idx_kern, idx_kern, wrong answer!
convolutionRowNoFlip<KERN_WIDTH>(sum,idx_in,idx_kern,kern_wid);
}
__syncthreads(); // ensure calculations have completed before any thread starts changing the shared memory
}
......@@ -724,6 +724,11 @@ conv_rows_stack2( float* img, float* kern, float* out,
}
__syncthreads();
//if needed as on Fermi as reading out of bound index from shared memory generate an error.
//Not needed on generation before as they worked anyway. Removing the if generate the good code
//as we store the result of only the good thread.
//This was with nvcc 3.0 on an GTX470 card.
if(out_row<out_len){
const float* idx_kern;
if(preload_full_kern) idx_kern=&d_kern[row*kern_wid];
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论