提交 877f4210 authored 作者: carriepl's avatar carriepl 提交者: Frederic

Avoid using method cudnnGetConvolution2dDescriptor() method

上级 ebd97785
...@@ -176,12 +176,14 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -176,12 +176,14 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
{ {
// Extract the properties of the convolution descriptor // Extract the properties of the convolution descriptor
int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y; int nd;
int pad[2];
int stride[2];
int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w, cudnnDataType_t data_type;
&stride_v, &stride_h, err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
&upscale_x, &upscale_y, upscale, &mode, &data_type);
&mode);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -202,7 +204,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -202,7 +204,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
// convolution. Fall back to a safe implementation otherwise. // convolution. Fall back to a safe implementation otherwise.
if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT) if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
{ {
if (stride_v != 1 || stride_h != 1 || input_h > 1024 || if (stride[0] != 1 || stride[1] != 1 || input_h > 1024 ||
input_w > 1024 || (filter_h == 1 && filter_w == 1)) input_w > 1024 || (filter_h == 1 && filter_w == 1))
{ {
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
...@@ -211,7 +213,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -211,7 +213,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
else else
{ {
// chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING // chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if (stride_v != 1 || stride_h != 1) if (stride[0] != 1 || stride[1] != 1)
{ {
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
} }
......
...@@ -173,12 +173,14 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -173,12 +173,14 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
{ {
// Extract the properties of the convolution descriptor // Extract the properties of the convolution descriptor
int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y; int nd;
int pad[2];
int stride[2];
int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w, cudnnDataType_t data_type;
&stride_v, &stride_h, err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
&upscale_x, &upscale_y, upscale, &mode, &data_type);
&mode);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -199,7 +201,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -199,7 +201,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
// convolution. Fall back to a safe implementation otherwise. // convolution. Fall back to a safe implementation otherwise.
if (chosen_algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT) if (chosen_algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT)
{ {
if (stride_v != 1 || stride_h != 1 || input_h > 1024 || if (stride[0] != 1 || stride[1] != 1 || input_h > 1024 ||
input_w > 1024 || (filter_h == 1 && filter_w == 1)) input_w > 1024 || (filter_h == 1 && filter_w == 1))
{ {
chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
...@@ -208,7 +210,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -208,7 +210,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
else else
{ {
// chosen_algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING // chosen_algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
if (stride_v != 1 || stride_h != 1) if (stride[0] != 1 || stride[1] != 1)
{ {
chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; chosen_algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
} }
......
...@@ -168,12 +168,14 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -168,12 +168,14 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
{ {
// Extract the properties of the convolution descriptor // Extract the properties of the convolution descriptor
int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y; int nd;
int pad[2];
int stride[2];
int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w, cudnnDataType_t data_type;
&stride_v, &stride_h, err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
&upscale_x, &upscale_y, upscale, &mode, &data_type);
&mode);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -192,7 +194,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -192,7 +194,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
// Ensure that the selected implementation supports the requested // Ensure that the selected implementation supports the requested
// convolution. Fall back to a safe implementation otherwise. // convolution. Fall back to a safe implementation otherwise.
if (stride_v != 1 || stride_h != 1 || input_h > 1024 || if (stride[0] != 1 || stride[1] != 1 || input_h > 1024 ||
input_w > 1024 || (filter_h == 1 && filter_w == 1)) input_w > 1024 || (filter_h == 1 && filter_w == 1))
{ {
chosen_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0; chosen_algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
......
...@@ -149,12 +149,14 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -149,12 +149,14 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) { algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) {
// Extract the properties of the convolution descriptor // Extract the properties of the convolution descriptor
int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y; int nd;
int pad[2];
int stride[2];
int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w, cudnnDataType_t data_type;
&stride_v, &stride_h, err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
&upscale_x, &upscale_y, upscale, &mode, &data_type);
&mode);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s", "error getting convolution properties: %s",
...@@ -165,7 +167,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -165,7 +167,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
if (algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT) if (algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
{ {
if (stride_v != 1 || stride_h != 1 || if (stride[0] != 1 || stride[1] != 1 ||
PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 || PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 ||
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1)) (PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
{ {
...@@ -175,7 +177,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -175,7 +177,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
else else
{ {
// algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING // algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if (stride_v != 1 || stride_h != 1) if (stride[0] != 1 || stride[1] != 1)
{ {
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
} }
......
...@@ -141,12 +141,14 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -141,12 +141,14 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT) && PyGpuArray_NDIM(kerns) == 4) { algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT) && PyGpuArray_NDIM(kerns) == 4) {
// Extract the properties of the convolution descriptor // Extract the properties of the convolution descriptor
int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y; int nd;
int pad[2];
int stride[2];
int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w, cudnnDataType_t data_type;
&stride_v, &stride_h, err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
&upscale_x, &upscale_y, upscale, &mode, &data_type);
&mode);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s", "error getting convolution properties: %s",
...@@ -157,7 +159,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -157,7 +159,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
if (algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT) if (algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT)
{ {
if (stride_v != 1 || stride_h != 1 || if (stride[0] != 1 || stride[1] != 1 ||
PyGpuArray_DIM(*input, 2) > 1024 || PyGpuArray_DIM(*input, 3) > 1024 || PyGpuArray_DIM(*input, 2) > 1024 || PyGpuArray_DIM(*input, 3) > 1024 ||
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1)) (PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
{ {
...@@ -167,7 +169,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -167,7 +169,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
else else
{ {
// algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING // algo == CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING
if (stride_v != 1 || stride_h != 1) if (stride[0] != 1 || stride[1] != 1)
{ {
algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
} }
......
...@@ -141,12 +141,14 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -141,12 +141,14 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
if (algo == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT && if (algo == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT &&
PyGpuArray_NDIM(input) == 4) { PyGpuArray_NDIM(input) == 4) {
// Extract the properties of the convolution descriptor // Extract the properties of the convolution descriptor
int pad_h, pad_w, stride_v, stride_h, upscale_x, upscale_y; int nd;
int pad[2];
int stride[2];
int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
err = cudnnGetConvolution2dDescriptor(desc, &pad_h, &pad_w, cudnnDataType_t data_type;
&stride_v, &stride_h, err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
&upscale_x, &upscale_y, upscale, &mode, &data_type);
&mode);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s", "error getting convolution properties: %s",
...@@ -155,7 +157,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -155,7 +157,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return 1; return 1;
} }
if (stride_v != 1 || stride_h != 1 || if (stride[0] != 1 || stride[1] != 1 ||
PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 || PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 ||
(PyGpuArray_DIM(*kerns, 2) == 1 && PyGpuArray_DIM(*kerns, 3) == 1)) { (PyGpuArray_DIM(*kerns, 2) == 1 && PyGpuArray_DIM(*kerns, 3) == 1)) {
algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0; algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论