提交 6f1dc3d8 authored 作者: carriepl's avatar carriepl 提交者: Frederic

Call cudnnGetConvolution2dDescriptor instead of cudnnGetConvolutionNdDescriptor

上级 bf918482
...@@ -147,13 +147,14 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -147,13 +147,14 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
// defined only for 2d filters // defined only for 2d filters
if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT || if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT ||
algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) { algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) {
int nd;
int pad[2]; // Extract the properties of the convolution descriptor
int stride[2]; int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y;
int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride, err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w,
upscale, &mode); &stride_v, &stride_h,
&upscale_x, &upscale_y,
&mode);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s", "error getting convolution properties: %s",
...@@ -162,21 +163,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -162,21 +163,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return 1; return 1;
} }
if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT) if (algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
{ {
if (stride[0] != 1 || stride[1] != 1 || if (stride_v != 1 || stride_h != 1 ||
PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 || PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 ||
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1)) (PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
{ {
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
} }
} }
else else
{ {
// chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING // algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if (stride[0] != 1 || stride[1] != 1) if (stride_v != 1 || stride_h != 1)
{ {
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
} }
} }
} }
......
...@@ -137,15 +137,16 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -137,15 +137,16 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
// can't. // can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are // The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters // defined only for 2d filters
if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT || if ((algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING ||
algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) { algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT) && PyGpuArray_NDIM(kerns) == 4) {
int nd;
int pad[2]; // Extract the properties of the convolution descriptor
int stride[2]; int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y;
int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride, err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w,
upscale, &mode); &stride_v, &stride_h,
&upscale_x, &upscale_y,
&mode);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s", "error getting convolution properties: %s",
...@@ -154,21 +155,21 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -154,21 +155,21 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return 1; return 1;
} }
if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT) if (algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT)
{ {
if (stride[0] != 1 || stride[1] != 1 || if (stride_v != 1 || stride_h != 1 ||
PyGpuArray_DIM(*input, 2) > 1024 || PyGpuArray_DIM(*input, 3) > 1024 || PyGpuArray_DIM(*input, 2) > 1024 || PyGpuArray_DIM(*input, 3) > 1024 ||
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1)) (PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
{ {
chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
} }
} }
else else
{ {
// chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING // algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
if (stride[0] != 1 || stride[1] != 1) if (stride_v != 1 || stride_h != 1)
{ {
chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
} }
} }
} }
......
...@@ -130,7 +130,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -130,7 +130,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif #endif
<<<<<<< HEAD
#if CUDNN_VERSION > 3000 #if CUDNN_VERSION > 3000
// The FFT implementation does not support strides, 1x1 filters or inputs // The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. // with a spatial dimension larger than 1024.
...@@ -141,13 +140,13 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -141,13 +140,13 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
// defined only for 2d filters // defined only for 2d filters
if (algo == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT && if (algo == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT &&
PyGpuArray_NDIM(input) == 4) { PyGpuArray_NDIM(input) == 4) {
int nd; // Extract the properties of the convolution descriptor
int pad[2]; int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y;
int stride[2];
int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride, err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w,
upscale, &mode); &stride_v, &stride_h,
&upscale_x, &upscale_y,
&mode);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s", "error getting convolution properties: %s",
...@@ -156,7 +155,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -156,7 +155,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return 1; return 1;
} }
if (stride[0] != 1 || stride[1] != 1 || if (stride_v != 1 || stride_h != 1 ||
PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 || PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 ||
(PyGpuArray_DIM(*kerns, 2) == 1 && PyGpuArray_DIM(*kerns, 3) == 1)) { (PyGpuArray_DIM(*kerns, 2) == 1 && PyGpuArray_DIM(*kerns, 3) == 1)) {
algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0; algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论