提交 2a857e3a authored 作者: carriepl's avatar carriepl 提交者: Frederic

Update GpuDnnConvGradI for CuDNN v4 (gpua backend)

上级 1e48b734
...@@ -667,16 +667,23 @@ class GpuDnnConvGradI(DnnBase): ...@@ -667,16 +667,23 @@ class GpuDnnConvGradI(DnnBase):
if self.inplace: if self.inplace:
self.destroy_map = {0: [2]} self.destroy_map = {0: [2]}
if algo is None: if algo is None:
algo = config.dnn.conv.algo_bwd algo = config.dnn.conv.algo_bwd_data
self.algo = algo self.algo = algo
assert self.algo in ['none', 'deterministic', 'fft', 'guess_once',
'guess_on_shape_change', 'time_once', # The small-workspace implementation is only available from CuDNN V4
'time_on_shape_change'] # onward.
if version() < (4000, 4000) and self.algo == 'fft_tiling':
raise RuntimeError("CuDNN's tiled-FFT convolution requires CuDNN "
"v4 or more recent")
assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
if not hasattr(self, 'algo'): if not hasattr(self, 'algo'):
self.algo = config.dnn.conv.algo_bwd self.algo = config.dnn.conv.algo_bwd_data
if not hasattr(self, 'inplace'): if not hasattr(self, 'inplace'):
self.inplace = False self.inplace = False
...@@ -713,6 +720,9 @@ class GpuDnnConvGradI(DnnBase): ...@@ -713,6 +720,9 @@ class GpuDnnConvGradI(DnnBase):
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
if self.algo == 'fft': if self.algo == 'fft':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
if self.algo == 'fft_tiling':
# big workspace but less than fft
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
if self.algo in ['guess_once', 'guess_on_shape_change', if self.algo in ['guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']: 'time_once', 'time_on_shape_change']:
...@@ -743,7 +753,8 @@ class GpuDnnConvGradI(DnnBase): ...@@ -743,7 +753,8 @@ class GpuDnnConvGradI(DnnBase):
raise TypeError("The number of dimensions of " raise TypeError("The number of dimensions of "
"kern, topgrad and output must match") "kern, topgrad and output must match")
if kern.type.ndim == 5 and self.algo in ['fft', 'deterministic']: if (kern.type.ndim == 5 and
self.algo in ['fft', 'deterministic', 'fft_tiling']):
raise ValueError("convolution algo %s can't be used for " raise ValueError("convolution algo %s can't be used for "
"3d convolutions", (self.algo,)) "3d convolutions", (self.algo,))
......
...@@ -129,7 +129,16 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -129,7 +129,16 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
#endif #endif
#if CUDNN_VERSION > 3000 #if CUDNN_VERSION > 3000
if (algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT) { // The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
// does not support strides.
// If the chosen implementation is FFT or tiled-FFT, validate that it can
// be used on the current data and default to a safe implementation if it
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT ||
algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) {
int nd; int nd;
int pad[2]; int pad[2];
int stride[2]; int stride[2];
...@@ -145,10 +154,22 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -145,10 +154,22 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return 1; return 1;
} }
if (stride[0] != 1 || stride[1] != 1 || if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
PyGpuArray_DIM(*input, 2) > 1024 || PyGpuArray_DIM(*input, 3) > 1024 || {
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1)) { if (stride[0] != 1 || stride[1] != 1 ||
algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; PyGpuArray_DIM(*input, 2) > 1024 || PyGpuArray_DIM(*input, 3) > 1024 ||
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
{
chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
}
}
else
{
// chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if (stride[0] != 1 || stride[1] != 1)
{
chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
}
} }
} }
#endif #endif
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论