提交 64439f41 authored 作者: carriepl's avatar carriepl 提交者: Frederic

Update GpuDnnConv for CuDNN V4 (gpua backend)

上级 93f6f441
...@@ -370,7 +370,8 @@ class GpuDnnConv(DnnBase): ...@@ -370,7 +370,8 @@ class GpuDnnConv(DnnBase):
kernel kernel
descr descr
The convolution descriptor. The convolution descriptor.
algo : {'small', 'none', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'} algo : {'small', 'none', 'large', 'fft', 'fft_tiling', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_fwd`. Default is the value of :attr:`config.dnn.conv.algo_fwd`.
""" """
...@@ -399,9 +400,15 @@ class GpuDnnConv(DnnBase): ...@@ -399,9 +400,15 @@ class GpuDnnConv(DnnBase):
elif self.algo in ['time_once', 'time_on_shape_change']: elif self.algo in ['time_once', 'time_on_shape_change']:
raise RuntimeError("CuDNN convolution timing requires CuDNN v3") raise RuntimeError("CuDNN convolution timing requires CuDNN v3")
assert self.algo in ['none', 'small', 'large', 'fft', 'guess_once', # The fft_tiling implementation is only available from CuDNN V4 onward
'guess_on_shape_change', 'time_once', if version() < 4000:
'time_on_shape_change'] if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent")
assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
...@@ -425,8 +432,13 @@ class GpuDnnConv(DnnBase): ...@@ -425,8 +432,13 @@ class GpuDnnConv(DnnBase):
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM'
elif self.algo == 'large': elif self.algo == 'large':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_GEMM' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_GEMM'
elif self.algo == 'direct':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_DIRECT'
elif self.algo == 'fft': elif self.algo == 'fft':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT'
elif self.algo == 'fft_tiling':
# need v4
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
defs.append(('CONV_ALGO', alg)) defs.append(('CONV_ALGO', alg))
if self.algo in ['guess_once', 'guess_on_shape_change', if self.algo in ['guess_once', 'guess_on_shape_change',
...@@ -456,9 +468,10 @@ class GpuDnnConv(DnnBase): ...@@ -456,9 +468,10 @@ class GpuDnnConv(DnnBase):
raise TypeError("The number of dimensions of " raise TypeError("The number of dimensions of "
"img, kern and output must match") "img, kern and output must match")
if img.type.ndim == 5 and self.algo == 'fft': if (img.type.ndim == 5 and
raise ValueError("convolution algo fft can't be used for " self.algo in ['small', 'large', 'fft', 'fft_tiling']):
"3d convolutions") raise ValueError("convolution algo %s can't be used for "
"3d convolutions", (self.algo,))
if (not isinstance(desc.type, CDataType) or if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnConvolutionDescriptor_t'): desc.type.ctype != 'cudnnConvolutionDescriptor_t'):
......
...@@ -137,7 +137,16 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -137,7 +137,16 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
#if CUDNN_VERSION > 3000 #if CUDNN_VERSION > 3000
if (algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT) { // The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
// does not support strides.
// If the chosen implementation is FFT or tiled-FFT, validate that it can
// be used on the current data and default to a safe implementation if it
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT ||
algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) {
int nd; int nd;
int pad[2]; int pad[2];
int stride[2]; int stride[2];
...@@ -153,10 +162,22 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -153,10 +162,22 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return 1; return 1;
} }
if (stride[0] != 1 || stride[1] != 1 || if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 || {
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1)) { if (stride[0] != 1 || stride[1] != 1 ||
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 ||
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
{
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
}
}
else
{
// chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if (stride[0] != 1 || stride[1] != 1)
{
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
}
} }
} }
#endif #endif
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论