提交 221fb064 authored 作者: --global's avatar --global

Integrate v3 in GpuDnnConvGradW

上级 813bc1e9
......@@ -539,18 +539,24 @@ class GpuDnnConvGradW(DnnBase, COp):
:param descr: the convolution descriptor
"""
__props__ = ('inplace',)
__props__ = ('workmem', 'inplace',)
__input_name__ = ('image', 'grad', 'output', 'descriptor', 'alpha', 'beta')
def __init__(self, inplace=False):
def __init__(self, inplace=False, workmem=None):
COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_gw.c"],
"APPLY_SPECIFIC(conv_gw)")
if workmem is None:
workmem = config.dnn.conv.workmem_bwd
self.workmem = workmem
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [2]}
assert self.workmem in ['none', 'deterministic', 'fft', 'guess']
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, 'workmem'):
self.workmem = 'none'
if not hasattr(self, 'inplace'):
self.inplace = False
......@@ -574,9 +580,29 @@ class GpuDnnConvGradW(DnnBase, COp):
def get_op_params(self):
if self.inplace:
return [('CONV_INPLACE', '1')]
inplace_def = [('CONV_INPLACE', '1')]
else:
return []
inplace_def = []
if version() == -1:
alg_def = ('CONV_ALGO', '0')
else:
if self.workmem == 'none':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0')
alg_choose_def = ('CHOOSE_ALGO', '0')
elif self.workmem == 'deterministic':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1')
alg_choose_def = ('CHOOSE_ALGO', '0')
elif self.workmem == 'fft':
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT')
alg_choose_def = ('CHOOSE_ALGO', '0')
elif self.workmem == 'guess':
# The convolution implementation should be choosen according
# to a heuristic
alg_def = ('CONV_ALGO', 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0')
alg_choose_def = ('CHOOSE_ALGO', '1')
return inplace_def + [alg_def, alg_choose_def]
def make_node(self, img, topgrad, output, desc, alpha=None, beta=None):
img = as_cuda_ndarray_variable(img)
......
#section support_code_struct
int
int
APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
CudaNdarray *km, cudnnConvolutionDescriptor_t desc,
float alpha, float beta, CudaNdarray **kerns) {
......@@ -8,7 +8,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(km)[1]) {
PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n");
"GpuDnnConv images and kernel must have the same stack size\n");
return 1;
}
......@@ -31,14 +31,110 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
err = cudnnConvolutionBackwardFilter(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output),
desc,
(void *)&beta,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(*kerns));
{
size_t worksize;
void *workspace;
cudnnConvolutionBwdFilterAlgo_t chosen_algo;
if (CHOOSE_ALGO)
{
// Check if the input and the output have the same shape as they have
// last time the apply node was executed
bool same_shapes = true;
for (int i = 0; (i < 4) && same_shapes; i++)
{
same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] !=
APPLY_SPECIFIC(previous_input_shape)[i]);
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] !=
APPLY_SPECIFIC(previous_output_shape)[i]);
}
if (!same_shapes)
{
// The shape of the inputs and/or the output is different from the
// last execution. Use the current shapes to infer the implementation
// to use from now on.
// Get the amount of available memory
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess){
cudaGetLastError();
fprintf(stderr,
"Error when trying to find the memory information"
" on the GPU: %s\n", cudaGetErrorString(err2));
return 1;
}
// Use heuristics to choose the implementation
err = cudnnGetConvolutionBackwardFilterAlgorithm(_handle,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(output),
desc,
APPLY_SPECIFIC(kerns),
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
free,
&chosen_algo);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConvGradW: error selecting convolution algo: %s",
cudnnGetErrorString(err));
return 1;
}
// Store the shapes of the inputs and kernels as well as the chosen
// algorithm for future use.
APPLY_SPECIFIC(previous_bwd_f_algo) = chosen_algo;
for (int i = 0; i < 4; i++)
{
APPLY_SPECIFIC(previous_input_shape)[i] =
CudaNdarray_HOST_DIMS(input)[i];
APPLY_SPECIFIC(previous_output_shape)[i] =
CudaNdarray_HOST_DIMS(output)[i];
}
}
else
{
chosen_algo = CONV_ALGO;
}
}
// Infer required workspace size from the chosen implementation
err = cudnnGetConvolutionBackwardFilterWorkspaceSize(_handle,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(output),
desc,
APPLY_SPECIFIC(kerns),
chosen_algo,
&worksize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConvGradW: error getting worksize: %s",
cudnnGetErrorString(err));
return 1;
}
// Allocate workspace for the convolution
workspace = get_work_mem(worksize);
if (workspace == NULL && worksize != 0)
return 1;
// Perform the convolution
err = cudnnConvolutionBackwardFilter_v3(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output),
desc,
chosen_algo,
&workspace, worksize,
(void *)&beta,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(*kerns));
}
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradW: error doing operation: %s",
cudnnGetErrorString(err));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论