提交 fc1ed897 authored 作者: --global's avatar --global

Modify GpuDnnConvGradI to support timing feature

上级 c4df0b99
......@@ -897,8 +897,8 @@ class GpuDnnConvGradI(DnnBase, COp):
def __init__(self, inplace=False, workmem=None, algo=None):
"""
:param workmem: *deprecated*, use param algo instead
:param algo: either 'none', 'deterministic', 'fft', 'guess_once' or
'guess_on_shape_change'.
:param algo: either 'none', 'deterministic', 'fft', 'guess_once',
'guess_on_shape_change', 'time_once' or 'time_on_shape_change'.
Default is the value of :attr:`config.dnn.conv.algo_bwd`.
"""
COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gi.c"],
......@@ -918,7 +918,8 @@ class GpuDnnConvGradI(DnnBase, COp):
if self.inplace:
self.destroy_map = {0: [2]}
assert self.algo in ['none', 'deterministic', 'fft', 'guess_once',
'guess_on_shape_change']
'guess_on_shape_change', 'time_once',
'time_on_shape_change']
def __setstate__(self, d):
self.__dict__.update(d)
......@@ -954,29 +955,42 @@ class GpuDnnConvGradI(DnnBase, COp):
else:
inplace_def = []
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '0')
choose_alg = '0'
choose_alg_once = '0'
choose_alg_time = '0'
if version() == -1 or version() < (3000, 3000):
alg_def = ('CONV_ALGO', '0')
alg_choose_def = ('CHOOSE_ALGO', '0')
alg = "0"
else:
if self.algo == 'none':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0')
alg_choose_def = ('CHOOSE_ALGO', '0')
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
elif self.algo == 'deterministic':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1')
alg_choose_def = ('CHOOSE_ALGO', '0')
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
elif self.algo == 'fft':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT')
alg_choose_def = ('CHOOSE_ALGO', '0')
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
elif self.algo in ['guess_once', 'guess_on_shape_change']:
# The convolution implementation should be choosen according
# The convolution implementation should be chosen according
# to a heuristic
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0')
alg_choose_def = ('CHOOSE_ALGO', '1')
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
choose_alg = '1'
if self.algo == 'guess_once':
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '1')
choose_alg_once = '1'
elif self.algo in ['time_once', 'guess_on_shape_change']:
# The convolution implementation should be chosen according
# to timing
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
choose_alg = '1'
choose_alg_time = '1'
if self.algo == 'time_once':
choose_alg_once = '1'
alg_def = ('CONV_ALGO', alg)
alg_choose_def = ('CHOOSE_ALGO', choose_alg)
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', choose_alg_once)
alg_choose_time_def = ('CHOOSE_ALGO_TIME', choose_alg_time)
return inplace_def + [alg_def, alg_choose_def, alg_choose_once_def]
return inplace_def + [alg_def, alg_choose_def, alg_choose_once_def,
alg_choose_time_def]
def make_node(self, kern, topgrad, output, desc, alpha=None, beta=None):
kern = as_cuda_ndarray_variable(kern)
......@@ -1031,7 +1045,8 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
super(GpuDnnConv3dGradI, self).__init__(inplace=inplace,
algo="none")
assert self.algo in ['none', 'guess_once', 'guess_on_shape_change']
assert self.algo in ['none', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def grad(self, inp, grads):
......
......@@ -42,8 +42,8 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if (CHOOSE_ALGO)
{
// A new convolution implementation should be selected, based on
// heuristics, if in one of the two following cases :
// A new convolution implementation should be selected, based either on
// timing or heuristics, if in one of the two following cases :
// - The implementation should only be chosen during the first execution
// of an apply node and this is the first execution of the apply node.
// - The implementation should be chosen as often as necessary and the
......@@ -75,35 +75,64 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
// new one based on the shapes of the current inputs
if (!reuse_previous_algo)
{
// Choose the convolution implementation using heuristics based on the
// shapes of the inputs and the amount of memory available.
// Get the amount of available memory
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess){
cudaGetLastError();
fprintf(stderr,
"Error when trying to find the memory information"
" on the GPU: %s\n", cudaGetErrorString(err2));
return 1;
// Obtain a convolution algorithm appropriate for the kernel and output
// shapes. Either by choosing one according to heuristics or by making
// CuDNN time every implementation and choose the best one.
if (CHOOSE_ALGO_TIME)
{
// Time the different implementations to choose the best one
int requestedCount = 1;
int count;
cudnnConvolutionBwdDataAlgoPerf_t choosen_algo_perf;
err = cudnnFindConvolutionBackwardDataAlgorithm(_handle,
APPLY_SPECIFIC(kerns),
APPLY_SPECIFIC(output),
desc,
APPLY_SPECIFIC(input),
requestedCount,
&count,
&choosen_algo_perf);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConvGradI: error selecting convolution algo: "
"%s", cudnnGetErrorString(err));
return 1;
}
chosen_algo = choosen_algo_perf.algo;
}
// Use heuristics to choose the implementation
err = cudnnGetConvolutionBackwardDataAlgorithm(_handle,
APPLY_SPECIFIC(kerns),
APPLY_SPECIFIC(output),
desc,
APPLY_SPECIFIC(input),
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
free,
&chosen_algo);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConvGradI: error selecting convolution algo: %s",
cudnnGetErrorString(err));
return 1;
else
{
// Choose the convolution implementation using heuristics based on the
// shapes of the inputs and the amount of memory available.
// Get the amount of available memory
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess){
cudaGetLastError();
fprintf(stderr,
"Error when trying to find the memory information"
" on the GPU: %s\n", cudaGetErrorString(err2));
return 1;
}
// Use heuristics to choose the implementation
err = cudnnGetConvolutionBackwardDataAlgorithm(_handle,
APPLY_SPECIFIC(kerns),
APPLY_SPECIFIC(output),
desc,
APPLY_SPECIFIC(input),
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
free,
&chosen_algo);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConvGradI: error selecting convolution algo: %s",
cudnnGetErrorString(err));
return 1;
}
}
// Store the shapes of the kernels and output as well as the chosen
......@@ -129,8 +158,8 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
chosen_algo = CONV_ALGO;
}
// The FFT implementation does not support strides, 1x1 filters or
// inputs with a spatial dimension larger than 1024.
// The FFT implementation (only in v3 and onward) does not support strides,
// 1x1 filters or inputs with a spatial dimension larger than 1024.
// If the chosen implementation is FFT, validate that it can be used
// on the current data and default on a safe implementation if it
// can't.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论