提交 8c7ab092 authored 作者: --global's avatar --global

Add 'guess_once' option for DnnConv3dGrad algo selection

上级 89199807
......@@ -348,7 +348,7 @@ AddConfigVar('dnn.conv.workmem',
AddConfigVar('dnn.conv.workmem_bwd',
"Default value for the workmem attribute of cudnn gradient "
"convolutions.",
EnumStr('none', 'deterministic', 'fft', 'guess'),
EnumStr('none', 'deterministic', 'fft', 'guess', 'guess_once'),
in_c_key=False)
......@@ -665,7 +665,8 @@ class GpuDnnConvGradW(DnnBase, COp):
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [2]}
assert self.workmem in ['none', 'deterministic', 'fft', 'guess']
assert self.workmem in ['none', 'deterministic', 'fft', 'guess',
'guess_once']
def __setstate__(self, d):
self.__dict__.update(d)
......@@ -698,6 +699,7 @@ class GpuDnnConvGradW(DnnBase, COp):
else:
inplace_def = []
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '0')
if version() == -1 or version() < (3000, 3000):
alg_def = ('CONV_ALGO', '0')
alg_choose_def = ('CHOOSE_ALGO', '0')
......@@ -711,13 +713,15 @@ class GpuDnnConvGradW(DnnBase, COp):
elif self.workmem == 'fft':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT')
alg_choose_def = ('CHOOSE_ALGO', '0')
elif self.workmem == 'guess':
elif self.workmem in ['guess', 'guess_once']:
# The convolution implementation should be choosen according
# to a heuristic
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0')
alg_choose_def = ('CHOOSE_ALGO', '1')
if self.workmem == 'guess_once':
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '1')
return inplace_def + [alg_def, alg_choose_def]
return inplace_def + [alg_def, alg_choose_def, alg_choose_once_def]
def make_node(self, img, topgrad, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img)
......@@ -757,7 +761,7 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
def __init__(self, inplace=False, workmem=None):
super(GpuDnnConv3dGradW, self).__init__(inplace=inplace, workmem='none')
assert self.workmem in ['none', 'time','guess']
assert self.workmem in ['none', 'time','guess', 'guess_once']
def grad(self, inp, grads):
img, top, output, desc, alpha, beta = inp
......@@ -818,7 +822,8 @@ class GpuDnnConvGradI(DnnBase, COp):
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [2]}
assert self.workmem in ['none', 'deterministic', 'fft', 'guess']
assert self.workmem in ['none', 'deterministic', 'fft', 'guess',
'guess_once']
def __setstate__(self, d):
self.__dict__.update(d)
......@@ -849,6 +854,7 @@ class GpuDnnConvGradI(DnnBase, COp):
else:
inplace_def = []
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '0')
if version() == -1 or version() < (3000, 3000):
alg_def = ('CONV_ALGO', '0')
alg_choose_def = ('CHOOSE_ALGO', '0')
......@@ -862,13 +868,15 @@ class GpuDnnConvGradI(DnnBase, COp):
elif self.workmem == 'fft':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT')
alg_choose_def = ('CHOOSE_ALGO', '0')
elif self.workmem == 'guess':
elif self.workmem in ['guess', 'guess_once']:
# The convolution implementation should be choosen according
# to a heuristic
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0')
alg_choose_def = ('CHOOSE_ALGO', '1')
if self.workmem == 'guess_once':
alg_choose_once_def = ('CHOOSE_ALGO_ONCE', '1')
return inplace_def + [alg_def, alg_choose_def]
return inplace_def + [alg_def, alg_choose_def, alg_choose_once_def]
def make_node(self, kern, topgrad, output, desc, alpha=None, beta=None):
kern = as_cuda_ndarray_variable(kern)
......@@ -913,7 +921,7 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
if workmem == None:
workmem = 'none'
super(GpuDnnConv3dGradI, self).__init__(inplace, workmem)
assert self.workmem in ['none', 'time','guess']
assert self.workmem in ['none', 'time', 'guess', 'guess_once']
def grad(self, inp, grads):
......
......@@ -41,22 +41,42 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if (CHOOSE_ALGO)
{
// Check if the kernels and the output have the same shape as they have
// last time the apply node was executed
bool same_shapes = true;
for (int i = 0; (i < nb_dim) && same_shapes; i++)
// A new convolution implementation should be selected, based on
// heuristics, if in one of the two following cases :
// - The implementation should only be chosen during the first execution
// of an apply node and this is the first execution of the apply node.
// - The implementation should be chosen as often as necessary and the
// shapes of the inputs differ from the last time an implementation
// was chosen.
bool reuse_previous_algo;
if (CHOOSE_ALGO_ONCE)
{
// Only choose a new implementation of none has been chosen before.
reuse_previous_algo = APPLY_SPECIFIC(previous_algo_set);
}
else
{
same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] ==
APPLY_SPECIFIC(previous_kerns_shape)[i]);
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] ==
APPLY_SPECIFIC(previous_output_shape)[i]);
// Reuse the previous implementation if the the kernels and the outputs
// have the same shapes as they had when the previous implementation
// was selected
bool same_shapes = true;
for (int i = 0; (i < nb_dim) && same_shapes; i++)
{
same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] ==
APPLY_SPECIFIC(previous_kerns_shape)[i]);
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] ==
APPLY_SPECIFIC(previous_output_shape)[i]);
}
reuse_previous_algo = same_shapes;
}
if (!same_shapes)
// If the previously choosen implementation can't be reused, select a
// new one based on the shapes of the current inputs
if (!reuse_previous_algo)
{
// The shape of the kernels and/or the output is different from the
// last execution. Use the current shapes to infer the implementation
// to use from now on.
// Choose the convolution implementation using heuristics based on the
// shapes of the inputs and the amount of memory available.
// Get the amount of available memory
size_t free = 0, total = 0;
......@@ -100,9 +120,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
}
else
{
// The shapes of the kernels and the output are the same as for the
// last execution. The convolution algorithm used last time can also
// be used here
// Reuse the previously chosen convlution implementation
chosen_algo = APPLY_SPECIFIC(previous_bwd_d_algo);
}
}
......
......@@ -41,22 +41,42 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (CHOOSE_ALGO)
{
// Check if the input and the output have the same shape as they have
// last time the apply node was executed
bool same_shapes = true;
for (int i = 0; (i < nb_dim) && same_shapes; i++)
// A new convolution implementation should be selected, based on
// heuristics, if in one of the two following cases :
// - The implementation should only be chosen during the first execution
// of an apply node and this is the first execution of the apply node.
// - The implementation should be chosen as often as necessary and the
// shapes of the inputs differ from the last time an implementation
// was chosen.
bool reuse_previous_algo;
if (CHOOSE_ALGO_ONCE)
{
// Only choose a new implementation of none has been chosen before.
reuse_previous_algo = APPLY_SPECIFIC(previous_algo_set);
}
else
{
same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] ==
APPLY_SPECIFIC(previous_input_shape)[i]);
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] ==
APPLY_SPECIFIC(previous_output_shape)[i]);
// Reuse the previous implementation if the the kernels and the outputs
// have the same shapes as they had when the previous implementation
// was selected
bool same_shapes = true;
for (int i = 0; (i < nb_dim) && same_shapes; i++)
{
same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] ==
APPLY_SPECIFIC(previous_input_shape)[i]);
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] ==
APPLY_SPECIFIC(previous_output_shape)[i]);
}
reuse_previous_algo = same_shapes;
}
if (!same_shapes)
// If the previously choosen implementation can't be reused, select a
// new one based on the shapes of the current inputs
if (!reuse_previous_algo)
{
// The shape of the inputs and/or the output is different from the
// last execution. Use the current shapes to infer the implementation
// to use from now on.
// Choose the convolution implementation using heuristics based on the
// shapes of the inputs and the amount of memory available.
// Get the amount of available memory
size_t free = 0, total = 0;
......@@ -100,9 +120,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
}
else
{
// The shapes of the input and the output are the same as for the
// last execution. The convolution algorithm used last time can also
// be used here
// Reuse the previously chosen convlution implementation
chosen_algo = APPLY_SPECIFIC(previous_bwd_f_algo);
}
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论