提交 6f1dc3d8 authored 作者: carriepl's avatar carriepl 提交者: Frederic

Call cudnnGetConvolution2dDescriptor instead of cudnnGetConvolutionNdDescriptor

上级 bf918482
......@@ -147,13 +147,14 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
// defined only for 2d filters
if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT ||
algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) {
int nd;
int pad[2];
int stride[2];
int upscale[2];
// Extract the properties of the convolution descriptor
int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y;
cudnnConvolutionMode_t mode;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode);
err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w,
&stride_v, &stride_h,
&upscale_x, &upscale_y,
&mode);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s",
......@@ -162,21 +163,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return 1;
}
if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
if (algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
{
if (stride[0] != 1 || stride[1] != 1 ||
if (stride_v != 1 || stride_h != 1 ||
PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 ||
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
{
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
}
}
else
{
// chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if (stride[0] != 1 || stride[1] != 1)
// algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if (stride_v != 1 || stride_h != 1)
{
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
}
}
}
......
......@@ -137,15 +137,16 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT ||
algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) {
int nd;
int pad[2];
int stride[2];
int upscale[2];
if ((algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING ||
algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT) && PyGpuArray_NDIM(kerns) == 4) {
// Extract the properties of the convolution descriptor
int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y;
cudnnConvolutionMode_t mode;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode);
err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w,
&stride_v, &stride_h,
&upscale_x, &upscale_y,
&mode);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s",
......@@ -154,21 +155,21 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return 1;
}
if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
if (algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT)
{
if (stride[0] != 1 || stride[1] != 1 ||
if (stride_v != 1 || stride_h != 1 ||
PyGpuArray_DIM(*input, 2) > 1024 || PyGpuArray_DIM(*input, 3) > 1024 ||
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
{
chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
}
}
else
{
// chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if (stride[0] != 1 || stride[1] != 1)
// algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
if (stride_v != 1 || stride_h != 1)
{
chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
}
}
}
......
......@@ -130,7 +130,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif
<<<<<<< HEAD
#if CUDNN_VERSION > 3000
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024.
......@@ -141,13 +140,13 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
// defined only for 2d filters
if (algo == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT &&
PyGpuArray_NDIM(input) == 4) {
int nd;
int pad[2];
int stride[2];
int upscale[2];
// Extract the properties of the convolution descriptor
int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y;
cudnnConvolutionMode_t mode;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode);
err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w,
&stride_v, &stride_h,
&upscale_x, &upscale_y,
&mode);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s",
......@@ -156,7 +155,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return 1;
}
if (stride[0] != 1 || stride[1] != 1 ||
if (stride_v != 1 || stride_h != 1 ||
PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 ||
(PyGpuArray_DIM(*kerns, 2) == 1 && PyGpuArray_DIM(*kerns, 3) == 1)) {
algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论