提交 94abb92a authored 作者: Frederic Bastien's avatar Frederic Bastien

Add code to print the cudnn algo used.

上级 920194a7
...@@ -159,6 +159,36 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -159,6 +159,36 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
chosen_algo = CONV_ALGO; chosen_algo = CONV_ALGO;
} }
if (0){
char * a;
switch(chosen_algo){
case CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM:
a = "implicit gemm (0)";
break;
case CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM:
a = "precomp gemm (1)";
break;
case CUDNN_CONVOLUTION_FWD_ALGO_GEMM:
a = "gemm (2)";
break;
case CUDNN_CONVOLUTION_FWD_ALGO_DIRECT:
a = "direct (3)";
break;
case CUDNN_CONVOLUTION_FWD_ALGO_FFT:
a = "fft (4)";
break;
case CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING:
a = "fft tiling (5)";
break;
#if CUDNN_VERSION > 5000
case CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD:
a = "winograd (6)";
break;
#endif
}
printf("GpuDNNConv: algo %s\n", a);
}
// The FFT implementation (only in V3 and onward) does not support strides, // The FFT implementation (only in V3 and onward) does not support strides,
// 1x1 filters or inputs with a spatial dimension larger than 1024. // 1x1 filters or inputs with a spatial dimension larger than 1024.
// The tiled-FFT implementation (only in V4 onward) does not support // The tiled-FFT implementation (only in V4 onward) does not support
......
...@@ -158,6 +158,30 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -158,6 +158,30 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
chosen_algo = CONV_ALGO; chosen_algo = CONV_ALGO;
} }
if (0){
char * a;
switch(chosen_algo){
case CUDNN_CONVOLUTION_BWD_DATA_ALGO_0:
a = "implicit gemm (0)";
break;
case CUDNN_CONVOLUTION_BWD_DATA_ALGO_1:
a = "precomp gemm (1)";
break;
case CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT:
a = "fft (2)";
break;
case CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING:
a = "fft tiling (3)";
break;
#if CUDNN_VERSION > 5000
case CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD:
a = "winograd (4)";
break;
#endif
}
printf("GpuDNNConvGI: algo %s\n", a);
}
// The FFT implementation (only in V3 and onward) does not support strides, // The FFT implementation (only in V3 and onward) does not support strides,
// 1x1 filters or inputs with a spatial dimension larger than 1024. // 1x1 filters or inputs with a spatial dimension larger than 1024.
// The tiled-FFT implementation (only in V4 onward) does not support // The tiled-FFT implementation (only in V4 onward) does not support
......
...@@ -158,6 +158,25 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -158,6 +158,25 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
chosen_algo = CONV_ALGO; chosen_algo = CONV_ALGO;
} }
if (0){
char * a;
switch(chosen_algo){
case CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0:
a = "algo 0 (0)";
break;
case CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1:
a = "algo 1 (1)";
break;
case CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT:
a = "fft (2)";
break;
case CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3:
a = "algo 3 (3)";
break;
}
printf("GpuDNNConvGW: algo %s\n", a);
}
// The FFT implementation (only in v3 and onward) does not support strides, // The FFT implementation (only in v3 and onward) does not support strides,
// 1x1 filters or inputs with a spatial dimension larger than 1024. // 1x1 filters or inputs with a spatial dimension larger than 1024.
// If the chosen implementation is FFT, validate that it can be used // If the chosen implementation is FFT, validate that it can be used
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论