提交 524438b6 authored 作者: carriepl's avatar carriepl 提交者: Frederic

Clean up V2 code

上级 877f4210
......@@ -3,35 +3,11 @@
#include <cudnn.h>
// If needed, define element of the V3 interface in terms of elements of
// previous versions
#if defined(CUDNN_VERSION) && CUDNN_VERSION < 3000
// Starting in V3, the cudnnSetConvolutionNdDescriptor has an additional
// parameter that determines the data type in which to do the computation.
// For versions older than V3, we need to define an alias for that function
// that will take the additional parameter as input but ignore it.
static inline cudnnStatus_t cudnnSetConvolutionNdDescriptor_v3(
cudnnConvolutionDescriptor_t convDesc,
int arrayLength,
int padA[],
int filterStrideA[]
int upscaleA[],
cudnnConvolutionMode_t mode,
cudnn_dataType_t dataType) {
return cudnnSetConvolutionNdDescriptor(convDesc, arrayLength, padA,
filterStrideA, upscaleA, mode);
}
#endif
// If needed, define element of the V4 interface in terms of elements of
// previous versions
#if defined(CUDNN_VERSION) && CUDNN_VERSION < 4000
#define CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING 5
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 3
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING 3
#endif
......
......@@ -15,11 +15,8 @@ int APPLY_SPECIFIC(previous_kerns_shape)[5];
int APPLY_SPECIFIC(previous_output_shape)[5];
bool APPLY_SPECIFIC(previous_algo_set);
cudnnConvolutionFwdAlgo_t APPLY_SPECIFIC(previous_algo);
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 3000
cudnnConvolutionBwdFilterAlgo_t APPLY_SPECIFIC(previous_bwd_f_algo);
cudnnConvolutionBwdDataAlgo_t APPLY_SPECIFIC(previous_bwd_d_algo);
#endif
#section init_code_struct
......@@ -55,10 +52,8 @@ APPLY_SPECIFIC(previous_algo_set) = false;
// Select default implementations for the case where the convolution
// implementations should be selected based on the size of the data.
APPLY_SPECIFIC(previous_algo) = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 3000
APPLY_SPECIFIC(previous_bwd_f_algo) = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
APPLY_SPECIFIC(previous_bwd_d_algo) = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
#endif
#section cleanup_code_struct
......
......@@ -81,7 +81,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
// CuDNN time every implementation and choose the best one.
if (CHOOSE_ALGO_TIME)
{
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 3000
// Time the different implementations to choose the best one
int requestedCount = 1;
int count;
......@@ -102,7 +101,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
}
chosen_algo = choosen_algo_perf.algo;
#endif
}
else
{
......@@ -161,7 +159,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
chosen_algo = CONV_ALGO;
}
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 3000
// The FFT implementation (only in V3 and onward) does not support strides,
// 1x1 filters or inputs with a spatial dimension larger than 1024.
// The tiled-FFT implementation (only in V4 onward) does not support
......@@ -219,30 +216,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
}
}
}
#endif
#if defined(CUDNN_VERSION) && CUDNN_VERSION < 3000
// In versions before V3, CuDNN did not support kernels larger than the
// inputs in any spatial dimension, even if padding was used such that the
// padded inputs were larger than the kernels. If the kernels are larger
// then the inputs, raise an error message.
bool shape_mismatch = false;
for (int i=2; i < nb_dim; i++){
shape_mismatch = shape_mismatch || (CudaNdarray_HOST_DIMS(kerns)[i] >
CudaNdarray_HOST_DIMS(input)[i]);
}
if (shape_mismatch){
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv: the current version of CuDNN does not support "
"kernels larger than the inputs in any spatial dimension, "
"even if the inputs are padded such that the padded inputs "
"are larger than the kernels. Update your installation of "
"CuDNN to V3 or more recent to solve the issue.");
return 1;
}
#endif
err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input),
......
......@@ -33,7 +33,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if (c_set_tensorNd(*input, APPLY_SPECIFIC(input)) == -1)
return 1;
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 3000
{
size_t worksize;
void *workspace;
......@@ -249,16 +248,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
(void *)&beta,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(*input));
}
#else
err = cudnnConvolutionBackwardData(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output),
desc,
(void *)&beta,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(*input));
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradI: error doing operation: %s",
......
......@@ -33,7 +33,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (c_set_filterNd(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 3000
{
size_t worksize;
void *workspace;
......@@ -234,16 +233,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(*kerns));
}
#else
err = cudnnConvolutionBackwardFilter(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output),
desc,
(void *)&beta,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(*kerns));
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradW: error doing operation: %s",
......
......@@ -13,125 +13,11 @@ static inline int cudnnGetVersion() {
#include <assert.h>
#if CUDNN_VERSION < 3000
// Here we define the R3 API in terms of functions in the R2 interface
// This is only for what we use
typedef int cudnnConvolutionBwdDataAlgo_t;
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 0
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 1
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT 2
static cudnnStatus_t cudnnGetConvolutionBackwardDataWorkspaceSize(
cudnnHandle_t handle,
const cudnnFilterDescriptor_t filterDesc,
const cudnnTensorDescriptor_t diffDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnTensorDescriptor_t gradDesc,
cudnnConvolutionBwdDataAlgo_t algo,
size_t *sizeInBytes) {
*sizeInBytes = 0;
return CUDNN_STATUS_SUCCESS;
}
static cudnnStatus_t cudnnConvolutionBackwardData_v3(
cudnnHandle_t handle,
const void *alpha,
const cudnnFilterDescriptor_t filterDesc,
const void *filterData,
const cudnnTensorDescriptor_t diffDesc,
const void *diffData,
const cudnnConvolutionDescriptor_t convDesc,
cudnnConvolutionBwdDataAlgo_t algo,
void *workspace,
size_t workspaceSizeInBytes,
const void *beta,
const cudnnTensorDescriptor_t gradDesc,
void *gradData) {
return cudnnConvolutionBackwardData(
handle,
alpha,
filterDesc,
filterData,
diffDesc,
diffData,
convDesc,
beta,
gradDesc,
gradData);
}
typedef int cudnnConvolutionBwdFilterAlgo_t;
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 0
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 1
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT 2
static cudnnStatus_t cudnnGetConvolutionBackwardFilterWorkspaceSize(
cudnnHandle_t handle,
const cudnnTensorDescriptor_t filterDesc,
const cudnnTensorDescriptor_t diffDesc,
const cudnnConvolutionDescriptor_t convDesc,
const cudnnFilterDescriptor_t gradDesc,
cudnnConvolutionBwdDataAlgo_t algo,
size_t *sizeInBytes) {
*sizeInBytes = 0;
return CUDNN_STATUS_SUCCESS;
}
static cudnnStatus_t cudnnConvolutionBackwardFilter_v3(
cudnnHandle_t handle,
const void *alpha,
const cudnnTensorDescriptor_t srcDesc,
const void *srcData,
const cudnnTensorDescriptor_t diffDesc,
const void *diffData,
const cudnnConvolutionDescriptor_t convDesc,
cudnnConvolutionBwdFilterAlgo_t algo,
void *workspace,
size_t workspaceSizeInBytes,
const void *beta,
const cudnnFilterDescriptor_t gradDesc,
void *gradData) {
return cudnnConvolutionBackwardFilter(
handle,
alpha,
srcDesc,
srcData,
diffDesc,
diffData,
convDesc,
beta,
gradDesc,
gradData);
}
// Starting in V3, the cudnnSetConvolutionNdDescriptor has an additional
// parameter that determines the data type in which to do the computation.
// For versions older than V3, we need to define an alias for that function
// that will take the additional parameter as input but ignore it.
static inline cudnnStatus_t cudnnSetConvolutionNdDescriptor_v3(
cudnnConvolutionDescriptor_t convDesc,
int arrayLength,
int padA[],
int filterStrideA[]
int upscaleA[],
cudnnConvolutionMode_t mode,
cudnn_dataType_t dataType) {
return cudnnSetConvolutionNdDescriptor(convDesc, arrayLength, padA,
filterStrideA, upscaleA, mode);
}
#endif
// If needed, define element of the V4 interface in terms of elements of
// previous versions
#if defined(CUDNN_VERSION) && CUDNN_VERSION < 4000
#define CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING 5
#define CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 3
#define CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING 3
#endif
......
......@@ -136,7 +136,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
algo == CUDNN_CONVOLUTION_FWD_ALGO_GEMM))
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
#if CUDNN_VERSION > 3000
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
// does not support strides.
......@@ -183,24 +182,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
}
#endif
#if CUDNN_VERSION < 3000
/* cuDNN before v3 does not support kernels larger than input even
* if appropriate padding is selected. */
for (unsigned int i = 2; i < PyGpuArray_NDIM(input); i++) {
if (PyGpuArray_DIM(kerns, i) > PyGpuArray_DIM(input, i)) {
PyErr_SetString(PyExc_RuntimeError, "the current version "
"of CuDNN does not support kernels larger than the "
"inputs in any spatial dimension, even if the inputs "
"are padded such that the padded inputs are larger "
"than the kernels. Update your installation of CuDNN "
"to V3 or more recent to solve the issue.");
cuda_exit(c->ctx);
return 1;
}
}
#endif
{
size_t worksize;
......
......@@ -128,7 +128,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
#endif
#if CUDNN_VERSION > 3000
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
// does not support strides.
......@@ -175,7 +174,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
}
}
#endif
size_t worksize;
gpudata *workspace;
......
......@@ -130,7 +130,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif
#if CUDNN_VERSION > 3000
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024.
// If the chosen implementation is FFT, validate that it can
......@@ -163,7 +162,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
}
}
#endif
size_t worksize;
gpudata *workspace;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论