提交 17bfc491 authored 作者: --global's avatar --global

Support V2 in GpuDnnConv

上级 3fdad2b7
...@@ -549,6 +549,18 @@ class GpuDnnConv(DnnBase, COp): ...@@ -549,6 +549,18 @@ class GpuDnnConv(DnnBase, COp):
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [2]} self.destroy_map = {0: [2]}
# In CuDNN version older than V3, the FFT implementation and the
# option to time the different implementations to get the fastest
# are both unavailable.
if version() < (3000, 3000):
if self.workmem == 'fft':
raise RuntimeError("CuDNN's FFT convolution is only available "
"starting at CuDNN v3")
elif self.workmem == 'time':
raise RuntimeError("CuDNN's convolution timing option is only "
"available starting at CuDNN v3")
assert self.workmem in ['none', 'small', 'large', 'fft', 'time', assert self.workmem in ['none', 'small', 'large', 'fft', 'time',
'guess'] 'guess']
...@@ -564,11 +576,12 @@ class GpuDnnConv(DnnBase, COp): ...@@ -564,11 +576,12 @@ class GpuDnnConv(DnnBase, COp):
inpl_def = [('CONV_INPLACE', '1')] inpl_def = [('CONV_INPLACE', '1')]
else: else:
inpl_def = [] inpl_def = []
choose_alg = '0'
choose_alg_time = '0'
if version() == -1: if version() == -1:
alg_def = ('CONV_ALGO', "0") alg = "0"
else: else:
choose_alg = '0'
choose_alg_time = '0'
if self.workmem == 'none': if self.workmem == 'none':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM'
elif self.workmem == 'small': elif self.workmem == 'small':
...@@ -589,9 +602,9 @@ class GpuDnnConv(DnnBase, COp): ...@@ -589,9 +602,9 @@ class GpuDnnConv(DnnBase, COp):
choose_alg = '1' choose_alg = '1'
choose_alg_time = '1' choose_alg_time = '1'
alg_def = ('CONV_ALGO', alg) alg_def = ('CONV_ALGO', alg)
alg_choose_def = ('CHOOSE_ALGO', choose_alg) alg_choose_def = ('CHOOSE_ALGO', choose_alg)
alg_choose_time_def = ('CHOOSE_ALGO_TIME', choose_alg_time) alg_choose_time_def = ('CHOOSE_ALGO_TIME', choose_alg_time)
return [alg_def, alg_choose_def, alg_choose_time_def] + inpl_def return [alg_def, alg_choose_def, alg_choose_time_def] + inpl_def
......
...@@ -6,13 +6,19 @@ cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns); ...@@ -6,13 +6,19 @@ cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns);
/* Keep track, from one execution to another, of the dimension of the data /* Keep track, from one execution to another, of the dimension of the data
and the algorithms, if any, that were selected according to these dimensions and the algorithms, if any, that were selected according to these dimensions
and according to the amount of memory available at that time. and according to the amount of memory available at that time.
Note : Implementation selection for backward convolution only exists starting
at V3.
*/ */
int APPLY_SPECIFIC(previous_input_shape)[5]; int APPLY_SPECIFIC(previous_input_shape)[5];
int APPLY_SPECIFIC(previous_kerns_shape)[5]; int APPLY_SPECIFIC(previous_kerns_shape)[5];
int APPLY_SPECIFIC(previous_output_shape)[5]; int APPLY_SPECIFIC(previous_output_shape)[5];
cudnnConvolutionFwdAlgo_t APPLY_SPECIFIC(previous_algo); cudnnConvolutionFwdAlgo_t APPLY_SPECIFIC(previous_algo);
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 3000
cudnnConvolutionBwdFilterAlgo_t APPLY_SPECIFIC(previous_bwd_f_algo); cudnnConvolutionBwdFilterAlgo_t APPLY_SPECIFIC(previous_bwd_f_algo);
cudnnConvolutionBwdDataAlgo_t APPLY_SPECIFIC(previous_bwd_d_algo); cudnnConvolutionBwdDataAlgo_t APPLY_SPECIFIC(previous_bwd_d_algo);
#endif
#section init_code_struct #section init_code_struct
......
...@@ -73,6 +73,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -73,6 +73,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
// CuDNN time every implementation and choose the best one. // CuDNN time every implementation and choose the best one.
if (CHOOSE_ALGO_TIME) if (CHOOSE_ALGO_TIME)
{ {
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 3000
// Time the different implementations to choose the best one // Time the different implementations to choose the best one
int requestedCount = 1; int requestedCount = 1;
int count; int count;
...@@ -93,6 +94,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -93,6 +94,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
} }
chosen_algo = choosen_algo_perf.algo; chosen_algo = choosen_algo_perf.algo;
#endif
} }
else else
{ {
...@@ -138,12 +140,14 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -138,12 +140,14 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
chosen_algo = CONV_ALGO; chosen_algo = CONV_ALGO;
} }
// The FFT implementation does not support strides, 1x1 filters or // The FFT implementation (only in V3 and onward) does not support strides,
// inputs with a spatial dimension larger than 1024. // 1x1 filters or inputs with a spatial dimension larger than 1024.
// If the chosen implementation is FFT, validate that it can be used // If the chosen implementation is FFT, validate that it can be used
// on the current data and default on a safe implementation if it // on the current data and default on a safe implementation if it
// can't. // can't.
// Following code is 2d-specific, but it is fine as ftt is define only for 2d-filters // Following code is 2d-specific, but it is fine as ftt is defined only for
// 2d-filters
#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 3000
if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT && nb_dim == 4) if (chosen_algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT && nb_dim == 4)
{ {
...@@ -178,6 +182,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -178,6 +182,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
} }
} }
#endif
err = cudnnGetConvolutionForwardWorkspaceSize(_handle, err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input), APPLY_SPECIFIC(input),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论