Add code to print the cudnn algo used.

94abb92a · Frederic Bastien · 920194a7 · 94abb92a · 94abb92a · 94abb92a
--- a/theano/sandbox/cuda/dnn_fwd.c
+++ b/theano/sandbox/cuda/dnn_fwd.c
@@ -159,6 +159,36 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
      chosen_algo = CONV_ALGO;
    }
+    if (0){
+      char * a;
+      switch(chosen_algo){
+      case CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM:
+	a = "implicit gemm (0)";
+	break;
+      case CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM:
+	a = "precomp gemm (1)";
+	break;
+      case CUDNN_CONVOLUTION_FWD_ALGO_GEMM:
+	a = "gemm (2)";
+	break;
+      case CUDNN_CONVOLUTION_FWD_ALGO_DIRECT:
+	a = "direct (3)";
+	break;
+      case CUDNN_CONVOLUTION_FWD_ALGO_FFT:
+	a = "fft (4)";
+	break;
+      case CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING:
+	a = "fft tiling (5)";
+	break;
+#if CUDNN_VERSION > 5000
+      case CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD:
+	a = "winograd (6)";
+	break;
+#endif
+      }
+      printf("GpuDNNConv: algo %s\n", a);
+    }
    // The FFT implementation (only in V3 and onward) does not support strides,
    // 1x1 filters or inputs with a spatial dimension larger than 1024.
    // The tiled-FFT implementation (only in V4 onward) does not support

--- a/theano/sandbox/cuda/dnn_gi.c
+++ b/theano/sandbox/cuda/dnn_gi.c
@@ -158,6 +158,30 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
        chosen_algo = CONV_ALGO;
    }
+    if (0){
+      char * a;
+      switch(chosen_algo){
+      case CUDNN_CONVOLUTION_BWD_DATA_ALGO_0:
+	a = "implicit gemm (0)";
+	break;
+      case CUDNN_CONVOLUTION_BWD_DATA_ALGO_1:
+	a = "precomp gemm (1)";
+	break;
+      case CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT:
+	a = "fft (2)";
+	break;
+      case CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING:
+	a = "fft tiling (3)";
+	break;
+#if CUDNN_VERSION > 5000
+      case CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD:
+	a = "winograd (4)";
+	break;
+#endif
+      }
+      printf("GpuDNNConvGI: algo %s\n", a);
+    }
    // The FFT implementation (only in V3 and onward) does not support strides,
    // 1x1 filters or inputs with a spatial dimension larger than 1024.
    // The tiled-FFT implementation (only in V4 onward) does not support

--- a/theano/sandbox/cuda/dnn_gw.c
+++ b/theano/sandbox/cuda/dnn_gw.c
@@ -158,6 +158,25 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
        chosen_algo = CONV_ALGO;
    }
+    if (0){
+      char * a;
+      switch(chosen_algo){
+      case CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0:
+	a = "algo 0 (0)";
+	break;
+      case CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1:
+	a = "algo 1 (1)";
+	break;
+      case CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT:
+	a = "fft (2)";
+	break;
+      case CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3:
+	a = "algo 3 (3)";
+	break;
+      }
+      printf("GpuDNNConvGW: algo %s\n", a);
+    }
    // The FFT implementation (only in v3 and onward) does not support strides,
    // 1x1 filters or inputs with a spatial dimension larger than 1024.
    // If the chosen implementation is FFT, validate that it can be used