提交 1ede3c8b authored 作者: James Bergstra's avatar James Bergstra

Merge pull request #586 from nouiz/small

some extra verbose prints in gpu conv code.
...@@ -52,6 +52,14 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -52,6 +52,14 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
CudaNdarray_HOST_STRIDES(kern)[1], CudaNdarray_HOST_STRIDES(kern)[1],
CudaNdarray_HOST_STRIDES(kern)[2], CudaNdarray_HOST_STRIDES(kern)[2],
CudaNdarray_HOST_STRIDES(kern)[3]); CudaNdarray_HOST_STRIDES(kern)[3]);
fprintf(stderr,
"INFO: out dim: %i %i %i %i out stride: %i %i %i %i\n",
CudaNdarray_HOST_DIMS(out)[0], CudaNdarray_HOST_DIMS(out)[1],
CudaNdarray_HOST_DIMS(out)[2], CudaNdarray_HOST_DIMS(out)[3],
CudaNdarray_HOST_STRIDES(out)[0],
CudaNdarray_HOST_STRIDES(out)[1],
CudaNdarray_HOST_STRIDES(out)[2],
CudaNdarray_HOST_STRIDES(out)[3]);
fprintf(stderr, fprintf(stderr,
"INFO: subsample_rows=%d, subsample_cols=%d\n", "INFO: subsample_rows=%d, subsample_cols=%d\n",
subsample_rows, subsample_cols); subsample_rows, subsample_cols);
...@@ -646,18 +654,6 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -646,18 +654,6 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
else if(!kern_flipped && !ccontig && split && !full_kern) f=conv_patch_stack_reduce<false,kern_wid,false, true, false>; else if(!kern_flipped && !ccontig && split && !full_kern) f=conv_patch_stack_reduce<false,kern_wid,false, true, false>;
CONV_PATCH_STACK_REDUCE_SPECIAL(THEANO_KERN_WID); CONV_PATCH_STACK_REDUCE_SPECIAL(THEANO_KERN_WID);
if (verbose)
fprintf(stderr,
"INFO: using 'conv_patch_stack_reduce' version"
" kern_flipped=%i ccontig=%i nb_split=%d,"
" preload_full_kern=%d\n",
kern_flipped, ccontig, nb_split, full_kern);
if (verbose>1)
fprintf(stderr,
"threads.x=%i, threads.y=%i, threads.z=%i, grid.x=%i,"
" grid.y=%i, shared_size=%i, nb_threads=%i\n",
threads.x, threads.y, threads.z, grid.x, grid.y,
shared_size, threads.x * threads.y * threads.z);
f<<< grid, threads, shared_size>>>(img->devdata, kern_data_unflipped, out->devdata, f<<< grid, threads, shared_size>>>(img->devdata, kern_data_unflipped, out->devdata,
img_len, img_wid, kern_len, kern_wid, img_len, img_wid, kern_len, kern_wid,
nkern, nstack, nkern, nstack,
...@@ -668,6 +664,19 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -668,6 +664,19 @@ CudaNdarray_conv_valid(const CudaNdarray *img, const CudaNdarray * kern,
cudaError_t sts = cudaGetLastError(); cudaError_t sts = cudaGetLastError();
if (cudaSuccess == sts) if (cudaSuccess == sts)
{ {
if (verbose>1)
fprintf(stderr,
"threads.x=%i, threads.y=%i, threads.z=%i, "
"grid.x=%i, grid.y=%i, shared_size=%i,"
" nb_threads=%i\n",
threads.x, threads.y, threads.z, grid.x, grid.y,
shared_size, threads.x * threads.y * threads.z);
if (verbose)
fprintf(stderr,
"INFO: used 'conv_patch_stack_reduce' version"
" kern_flipped=%i ccontig=%i nb_split=%d,"
" preload_full_kern=%d\n",
kern_flipped, ccontig, nb_split, full_kern);
work_complete = true; work_complete = true;
} }
else else
...@@ -991,6 +1000,13 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern, ...@@ -991,6 +1000,13 @@ CudaNdarray_conv_full(const CudaNdarray *img, const CudaNdarray * kern,
CudaNdarray_HOST_STRIDES(kern)[1], CudaNdarray_HOST_STRIDES(kern)[1],
CudaNdarray_HOST_STRIDES(kern)[2], CudaNdarray_HOST_STRIDES(kern)[2],
CudaNdarray_HOST_STRIDES(kern)[3]); CudaNdarray_HOST_STRIDES(kern)[3]);
printf("INFO: out dim: %i %i %i %i out stride: %i %i %i %i\n",
CudaNdarray_HOST_DIMS(out)[0], CudaNdarray_HOST_DIMS(out)[1],
CudaNdarray_HOST_DIMS(out)[2], CudaNdarray_HOST_DIMS(out)[3],
CudaNdarray_HOST_STRIDES(out)[0],
CudaNdarray_HOST_STRIDES(out)[1],
CudaNdarray_HOST_STRIDES(out)[2],
CudaNdarray_HOST_STRIDES(out)[3]);
} }
if (!subsample && if (!subsample &&
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论