Merge pull request #2555 from abergeron/cudnn_r2_bug

Work around cudnn bug.

Merge pull request #2555 from abergeron/cudnn_r2_bug
32f7abe2 · Pascal Lamblin · bcebb452 · ad527df1 · 32f7abe2
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -565,7 +565,7 @@ class GpuDnnConvGradI(DnnBase, COp):


 def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
-             conv_mode='conv', direction_hint=None):
+             conv_mode='conv', direction_hint=None, workmem=None):
    """
    GPU convolution using cuDNN from NVIDIA.

@@ -587,6 +587,11 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
        *not* 'forward!', it will use GpuDnnConvGradI.
        This parameter is used internally by graph optimizers and may be
        removed at any time without a deprecation period. You have been warned.
+    :param workmem: Specify the amount of working memory allowed.
+      More memory is usually faster.  One of 'none', 'small' or
+      'large'.  (default is None which takes its value from
+      config.dnn.conv.workmem)
+

    :warning: The cuDNN library only works with GPU that have a compute
      capability of 3.0 or higer.  This means that older GPU will not
@@ -637,7 +642,13 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
    kerns = gpu_contiguous(kerns)
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
                          conv_mode=conv_mode)(img.shape, kerns.shape)
-    return GpuDnnConv()(img, kerns, desc)
+    if conv_mode == 'cross' and subsample != (1, 1) and border_mode != 'valid':
+        # there is a bug in cudnn v2 rc1-3 which gives incorrect
+        # results in this case when using the workmem='small'
+        # algorithm.
+        if workmem is None or workmem == 'small':
+            workmem = 'none'
+    return GpuDnnConv(workmem=workmem)(img, kerns, desc)


 class GpuDnnPoolDesc(GpuOp):