提交 8c7ab092 authored 作者: --global's avatar --global

Add 'guess_once' option for DnnConv3dGrad algo selection

上级 89199807
...@@ -348,7 +348,7 @@ AddConfigVar('dnn.conv.workmem', ...@@ -348,7 +348,7 @@ AddConfigVar('dnn.conv.workmem',
AddConfigVar('dnn.conv.workmem_bwd', AddConfigVar('dnn.conv.workmem_bwd',
"Default value for the workmem attribute of cudnn gradient " "Default value for the workmem attribute of cudnn gradient "
"convolutions.", "convolutions.",
EnumStr('none', 'deterministic', 'fft', 'guess'), EnumStr('none', 'deterministic', 'fft', 'guess', 'guess_once'),
in_c_key=False) in_c_key=False)
...@@ -665,7 +665,8 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -665,7 +665,8 @@ class GpuDnnConvGradW(DnnBase, COp):
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [2]} self.destroy_map = {0: [2]}
assert self.workmem in ['none', 'deterministic', 'fft', 'guess'] assert self.workmem in ['none', 'deterministic', 'fft', 'guess',
'guess_once']
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
...@@ -698,6 +699,7 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -698,6 +699,7 @@ class GpuDnnConvGradW(DnnBase, COp):
else: else:
inplace_def = [] inplace_def = []
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '0')
if version() == -1 or version() < (3000, 3000): if version() == -1 or version() < (3000, 3000):
alg_def = ('CONV_ALGO', '0') alg_def = ('CONV_ALGO', '0')
alg_choose_def = ('CHOOSE_ALGO', '0') alg_choose_def = ('CHOOSE_ALGO', '0')
...@@ -711,13 +713,15 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -711,13 +713,15 @@ class GpuDnnConvGradW(DnnBase, COp):
elif self.workmem == 'fft': elif self.workmem == 'fft':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT') alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT')
alg_choose_def = ('CHOOSE_ALGO', '0') alg_choose_def = ('CHOOSE_ALGO', '0')
elif self.workmem == 'guess': elif self.workmem in ['guess', 'guess_once']:
# The convolution implementation should be choosen according # The convolution implementation should be choosen according
# to a heuristic # to a heuristic
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0') alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0')
alg_choose_def = ('CHOOSE_ALGO', '1') alg_choose_def = ('CHOOSE_ALGO', '1')
if self.workmem == 'guess_once':
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '1')
return inplace_def + [alg_def, alg_choose_def] return inplace_def + [alg_def, alg_choose_def, alg_choose_once_def]
def make_node(self, img, topgrad, output, desc, alpha=None, beta=None): def make_node(self, img, topgrad, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img) img = as_cuda_ndarray_variable(img)
...@@ -757,7 +761,7 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW): ...@@ -757,7 +761,7 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
def __init__(self, inplace=False, workmem=None): def __init__(self, inplace=False, workmem=None):
super(GpuDnnConv3dGradW, self).__init__(inplace=inplace, workmem='none') super(GpuDnnConv3dGradW, self).__init__(inplace=inplace, workmem='none')
assert self.workmem in ['none', 'time','guess'] assert self.workmem in ['none', 'time','guess', 'guess_once']
def grad(self, inp, grads): def grad(self, inp, grads):
img, top, output, desc, alpha, beta = inp img, top, output, desc, alpha, beta = inp
...@@ -818,7 +822,8 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -818,7 +822,8 @@ class GpuDnnConvGradI(DnnBase, COp):
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [2]} self.destroy_map = {0: [2]}
assert self.workmem in ['none', 'deterministic', 'fft', 'guess'] assert self.workmem in ['none', 'deterministic', 'fft', 'guess',
'guess_once']
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
...@@ -849,6 +854,7 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -849,6 +854,7 @@ class GpuDnnConvGradI(DnnBase, COp):
else: else:
inplace_def = [] inplace_def = []
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '0')
if version() == -1 or version() < (3000, 3000): if version() == -1 or version() < (3000, 3000):
alg_def = ('CONV_ALGO', '0') alg_def = ('CONV_ALGO', '0')
alg_choose_def = ('CHOOSE_ALGO', '0') alg_choose_def = ('CHOOSE_ALGO', '0')
...@@ -862,13 +868,15 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -862,13 +868,15 @@ class GpuDnnConvGradI(DnnBase, COp):
elif self.workmem == 'fft': elif self.workmem == 'fft':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT') alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT')
alg_choose_def = ('CHOOSE_ALGO', '0') alg_choose_def = ('CHOOSE_ALGO', '0')
elif self.workmem == 'guess': elif self.workmem in ['guess', 'guess_once']:
# The convolution implementation should be choosen according # The convolution implementation should be choosen according
# to a heuristic # to a heuristic
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0') alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0')
alg_choose_def = ('CHOOSE_ALGO', '1') alg_choose_def = ('CHOOSE_ALGO', '1')
if self.workmem == 'guess_once':
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '1')
return inplace_def + [alg_def, alg_choose_def] return inplace_def + [alg_def, alg_choose_def, alg_choose_once_def]
def make_node(self, kern, topgrad, output, desc, alpha=None, beta=None): def make_node(self, kern, topgrad, output, desc, alpha=None, beta=None):
kern = as_cuda_ndarray_variable(kern) kern = as_cuda_ndarray_variable(kern)
...@@ -913,7 +921,7 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI): ...@@ -913,7 +921,7 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
if workmem == None: if workmem == None:
workmem = 'none' workmem = 'none'
super(GpuDnnConv3dGradI, self).__init__(inplace, workmem) super(GpuDnnConv3dGradI, self).__init__(inplace, workmem)
assert self.workmem in ['none', 'time','guess'] assert self.workmem in ['none', 'time', 'guess', 'guess_once']
def grad(self, inp, grads): def grad(self, inp, grads):
......
...@@ -41,22 +41,42 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -41,22 +41,42 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if (CHOOSE_ALGO) if (CHOOSE_ALGO)
{ {
// Check if the kernels and the output have the same shape as they have
// last time the apply node was executed // A new convolution implementation should be selected, based on
bool same_shapes = true; // heuristics, if in one of the two following cases :
for (int i = 0; (i < nb_dim) && same_shapes; i++) // - The implementation should only be chosen during the first execution
// of an apply node and this is the first execution of the apply node.
// - The implementation should be chosen as often as necessary and the
// shapes of the inputs differ from the last time an implementation
// was chosen.
bool reuse_previous_algo;
if (CHOOSE_ALGO_ONCE)
{
// Only choose a new implementation of none has been chosen before.
reuse_previous_algo = APPLY_SPECIFIC(previous_algo_set);
}
else
{ {
same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] == // Reuse the previous implementation if the the kernels and the outputs
APPLY_SPECIFIC(previous_kerns_shape)[i]); // have the same shapes as they had when the previous implementation
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] == // was selected
APPLY_SPECIFIC(previous_output_shape)[i]); bool same_shapes = true;
for (int i = 0; (i < nb_dim) && same_shapes; i++)
{
same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] ==
APPLY_SPECIFIC(previous_kerns_shape)[i]);
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] ==
APPLY_SPECIFIC(previous_output_shape)[i]);
}
reuse_previous_algo = same_shapes;
} }
if (!same_shapes) // If the previously choosen implementation can't be reused, select a
// new one based on the shapes of the current inputs
if (!reuse_previous_algo)
{ {
// The shape of the kernels and/or the output is different from the // Choose the convolution implementation using heuristics based on the
// last execution. Use the current shapes to infer the implementation // shapes of the inputs and the amount of memory available.
// to use from now on.
// Get the amount of available memory // Get the amount of available memory
size_t free = 0, total = 0; size_t free = 0, total = 0;
...@@ -100,9 +120,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -100,9 +120,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
} }
else else
{ {
// The shapes of the kernels and the output are the same as for the // Reuse the previously chosen convlution implementation
// last execution. The convolution algorithm used last time can also
// be used here
chosen_algo = APPLY_SPECIFIC(previous_bwd_d_algo); chosen_algo = APPLY_SPECIFIC(previous_bwd_d_algo);
} }
} }
......
...@@ -41,22 +41,42 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -41,22 +41,42 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (CHOOSE_ALGO) if (CHOOSE_ALGO)
{ {
// Check if the input and the output have the same shape as they have
// last time the apply node was executed // A new convolution implementation should be selected, based on
bool same_shapes = true; // heuristics, if in one of the two following cases :
for (int i = 0; (i < nb_dim) && same_shapes; i++) // - The implementation should only be chosen during the first execution
// of an apply node and this is the first execution of the apply node.
// - The implementation should be chosen as often as necessary and the
// shapes of the inputs differ from the last time an implementation
// was chosen.
bool reuse_previous_algo;
if (CHOOSE_ALGO_ONCE)
{
// Only choose a new implementation of none has been chosen before.
reuse_previous_algo = APPLY_SPECIFIC(previous_algo_set);
}
else
{ {
same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] == // Reuse the previous implementation if the the kernels and the outputs
APPLY_SPECIFIC(previous_input_shape)[i]); // have the same shapes as they had when the previous implementation
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] == // was selected
APPLY_SPECIFIC(previous_output_shape)[i]); bool same_shapes = true;
for (int i = 0; (i < nb_dim) && same_shapes; i++)
{
same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] ==
APPLY_SPECIFIC(previous_input_shape)[i]);
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] ==
APPLY_SPECIFIC(previous_output_shape)[i]);
}
reuse_previous_algo = same_shapes;
} }
if (!same_shapes) // If the previously choosen implementation can't be reused, select a
// new one based on the shapes of the current inputs
if (!reuse_previous_algo)
{ {
// The shape of the inputs and/or the output is different from the // Choose the convolution implementation using heuristics based on the
// last execution. Use the current shapes to infer the implementation // shapes of the inputs and the amount of memory available.
// to use from now on.
// Get the amount of available memory // Get the amount of available memory
size_t free = 0, total = 0; size_t free = 0, total = 0;
...@@ -100,9 +120,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -100,9 +120,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
} }
else else
{ {
// The shapes of the input and the output are the same as for the // Reuse the previously chosen convlution implementation
// last execution. The convolution algorithm used last time can also
// be used here
chosen_algo = APPLY_SPECIFIC(previous_bwd_f_algo); chosen_algo = APPLY_SPECIFIC(previous_bwd_f_algo);
} }
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论