Workaround only for cuDNN <= 6020.

e31d548c · notoraptor · 0cb8fbe7 · e31d548c
--- a/theano/gpuarray/dnn_fwd.c
+++ b/theano/gpuarray/dnn_fwd.c
@@ -170,8 +170,11 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
        algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING))
    algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-  /* Algo `small` seems to not work for a batch size > 2^16, with cuDNN >= V5.1. */
+  // Algo `small` does not work for a batch size > 2^16, with cuDNN >= V5.1.
-  if (algo == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM && PyGpuArray_DIM(input, 0) > 65536)
+  // Issue should have been resolved for cuDNN >= V6.0.20.
+  if (cudnnGetVersion() <= 6020 &&
+      algo == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM &&
+      PyGpuArray_DIM(input, 0) > 65536)
      algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
  // The FFT implementation does not support strides, 1x1 filters or inputs