提交 2597dcd2 authored 作者: --global's avatar --global

Add support for implementation timing

上级 f9b85e1e
......@@ -350,7 +350,7 @@ class GpuDnnConvDesc(GpuOp):
AddConfigVar('dnn.conv.workmem',
"Default value for the workmem attribute of cudnn convolutions.",
EnumStr('small', 'none', 'large'),
EnumStr('small', 'none', 'large', 'guess', 'time'),
in_c_key=False)
# scalar constants
......@@ -397,7 +397,8 @@ class GpuDnnConv(DnnBase, COp):
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [2]}
assert self.workmem in ['none', 'small', 'large']
assert self.workmem in ['none', 'small', 'large', 'fft', 'time',
'guess']
def __setstate__(self, d):
self.__dict__.update(d)
......@@ -417,18 +418,37 @@ class GpuDnnConv(DnnBase, COp):
if self.workmem == 'none':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM'
choose_alg = '0'
choose_alg_time = '0'
elif self.workmem == 'small':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM'
choose_alg = '0'
choose_alg_time = '0'
elif self.workmem == 'large':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_GEMM'
choose_alg = '0'
choose_alg_time = '0'
elif self.workmem == 'fft':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT'
choose_alg = '0'
choose_alg_time = '0'
elif self.workmem == 'guess':
# The convolution implementation should be choosen according
# to a heuristic
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM'
choose_alg = '1'
choose_alg_time = '0'
elif self.workmem == 'time':
alg = "0"
# The convolution implementation should be choosen by timing
# every available implementation
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM'
choose_alg = '1'
choose_alg_time = '1'
alg_def = ('CONV_ALGO', alg)
alg_choose_def = ('CHOOSE_ALGO', choose_alg)
return [alg_def, alg_choose_def] + inpl_def
alg_choose_time_def = ('CHOOSE_ALGO_TIME', choose_alg_time)
return [alg_def, alg_choose_def, alg_choose_time_def] + inpl_def
def make_node(self, img, kern, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img)
......
......@@ -42,6 +42,7 @@ APPLY_SPECIFIC(previous_kerns_shape)[0] = 0;
APPLY_SPECIFIC(previous_kerns_shape)[1] = 0;
APPLY_SPECIFIC(previous_kerns_shape)[2] = 0;
APPLY_SPECIFIC(previous_kerns_shape)[3] = 0;
APPLY_SPECIFIC(previous_algo) = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
#section cleanup_code_struct
......
......@@ -35,7 +35,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
void *workspace;
cudnnConvolutionFwdAlgo_t chosen_algo;
if (CHOOSE_ALGO){
if (CHOOSE_ALGO)
{
// Check if the input and the kernels have the same shape as they have
// last time the apply node was executed
......@@ -48,7 +49,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC(previous_kerns_shape)[i]);
}
if (same_shapes)
if (!same_shapes)
{
// The shape of the inputs and/or the kernels is different from the
// last execution. Use the current shapes to infer the implementation
......@@ -62,10 +63,32 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
fprintf(stderr,
"Error when trying to find the memory information"
" on the GPU: %s\n", cudaGetErrorString(err2));
return 1;
}
// Obtain a convolution algorithm appropriate for the input and kernel
// shapes
// shapes. Either by choosing one according to heuristics or by making
// CuDNN time every implementation and choose the best one.
if (CHOOSE_ALGO_TIME)
{
// Time the different implementations to choose the best one
int requestedCount = 2;
int count;
cudnnConvolutionFwdAlgoPerf_t choosen_algo_perf;
err = cudnnFindConvolutionForwardAlgorithm(_handle,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
requestedCount,
&count,
&choosen_algo_perf);
chosen_algo = choosen_algo_perf.algo;
fprintf(stdout, "Choose algo %i\n", chosen_algo);
}
else
{
// Use heuristics to choose the implementation
err = cudnnGetConvolutionForwardAlgorithm(_handle,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
......@@ -74,6 +97,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
free,
&chosen_algo);
}
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论