提交 32f7abe2 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2555 from abergeron/cudnn_r2_bug

Work around cudnn bug.
......@@ -565,7 +565,7 @@ class GpuDnnConvGradI(DnnBase, COp):
def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode='conv', direction_hint=None):
conv_mode='conv', direction_hint=None, workmem=None):
"""
GPU convolution using cuDNN from NVIDIA.
......@@ -587,6 +587,11 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
*not* 'forward!', it will use GpuDnnConvGradI.
This parameter is used internally by graph optimizers and may be
removed at any time without a deprecation period. You have been warned.
:param workmem: Specify the amount of working memory allowed.
More memory is usually faster. One of 'none', 'small' or
'large'. (default is None which takes its value from
config.dnn.conv.workmem)
:warning: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
......@@ -637,7 +642,13 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
kerns = gpu_contiguous(kerns)
desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode)(img.shape, kerns.shape)
return GpuDnnConv()(img, kerns, desc)
if conv_mode == 'cross' and subsample != (1, 1) and border_mode != 'valid':
# there is a bug in cudnn v2 rc1-3 which gives incorrect
# results in this case when using the workmem='small'
# algorithm.
if workmem is None or workmem == 'small':
workmem = 'none'
return GpuDnnConv(workmem=workmem)(img, kerns, desc)
class GpuDnnPoolDesc(GpuOp):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论