提交 926dec89 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6368 from notoraptor/fix-cudnn-conv-timed-algos-with-beta

Fix error related to cuDNN runtime timed algos when beta is not null.
...@@ -234,14 +234,24 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -234,14 +234,24 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
if (input->ga.typecode == GA_HALF) if (input->ga.typecode == GA_HALF)
c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH); c_set_math_type_for_conv(desc, CUDNN_TENSOR_OP_MATH);
/* cudnnFindConvolutionForwardAlgorithmEx() may write to output.
We don't want that if output is used in computation (ie. if beta != 0). */
PyGpuArrayObject* o = *output;
if (beta != 0) {
o = pygpu_empty(PyGpuArray_NDIM(*output), PyGpuArray_DIMS(*output), (*output)->ga.typecode, GA_C_ORDER, c, Py_None);
}
// We don't sync the buffer as we don't care about the values. // We don't sync the buffer as we don't care about the values.
err = cudnnFindConvolutionForwardAlgorithmEx( err = cudnnFindConvolutionForwardAlgorithmEx(
params->handle, APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input), params->handle, APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
desc, APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output), desc, APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(o),
1, &count, &choice, *(void **)tmpmem, 1, &count, &choice, *(void **)tmpmem,
maxfree); maxfree);
gpudata_release(tmpmem); gpudata_release(tmpmem);
if (beta != 0) {
Py_XDECREF(o);
}
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
......
...@@ -204,12 +204,22 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -204,12 +204,22 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return -1; return -1;
} }
/* cudnnFindConvolutionBackwardDataAlgorithmEx() may write to output (input).
We don't want that if output is used in computation (ie. if beta != 0). */
PyGpuArrayObject* ip = *input;
if (beta != 0) {
ip = pygpu_empty(PyGpuArray_NDIM(*input), PyGpuArray_DIMS(*input), (*input)->ga.typecode, GA_C_ORDER, c, Py_None);
}
err = cudnnFindConvolutionBackwardDataAlgorithmEx( err = cudnnFindConvolutionBackwardDataAlgorithmEx(
params->handle, APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns), params->handle, APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output), desc, APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output), desc,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(*input), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(ip),
1, &count, &choice, *(void **)tmpmem, maxfree); 1, &count, &choice, *(void **)tmpmem, maxfree);
gpudata_release(tmpmem); gpudata_release(tmpmem);
if (beta != 0) {
Py_XDECREF(ip);
}
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error selecting convolution algo: %s", PyErr_Format(PyExc_RuntimeError, "error selecting convolution algo: %s",
......
...@@ -191,12 +191,22 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -191,12 +191,22 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return -1; return -1;
} }
/* cudnnFindConvolutionBackwardFilterAlgorithmEx() may write to kernels output (kerns).
We don't want that if output is used in computation (ie. if beta != 0). */
PyGpuArrayObject* k = *kerns;
if (beta != 0) {
k = pygpu_empty(PyGpuArray_NDIM(*kerns), PyGpuArray_DIMS(*kerns), (*kerns)->ga.typecode, GA_C_ORDER, c, Py_None);
}
err = cudnnFindConvolutionBackwardFilterAlgorithmEx( err = cudnnFindConvolutionBackwardFilterAlgorithmEx(
params->handle, APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input), params->handle, APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output), desc, APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output), desc,
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(*kerns), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(k),
1, &count, &choice, *(void **)tmpmem, maxfree); 1, &count, &choice, *(void **)tmpmem, maxfree);
gpudata_release(tmpmem); gpudata_release(tmpmem);
if (beta != 0) {
Py_XDECREF(k);
}
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论