提交 c251a77d authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron 提交者: Frederic Bastien

Use the real amount of memory that is available to select algorithm in cudnn.

上级 59f671e2
......@@ -98,12 +98,35 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
#endif
if (!reuse_algo) {
size_t free;
int err2 = gpucontext_property(ctx, GA_CTX_PROP_LARGEST_MEMBLOCK, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
// Guess 4Mb if the info is not available
if (free == 0) free = 4 * 1024 * 1024;
#ifdef CHOOSE_TIME
int count;
cudnnConvolutionFwdAlgoPerf_t choice;
err = cudnnFindConvolutionForwardAlgorithm(
gpudata *tmpmem;
tmpmem = gpudata_alloc(ctx, free, NULL, 0, NULL);
if (tmpmem == NULL) {
PyErr_SetString(PyExc_MemoryError, "Could not allocate working GPU memory");
return -1;
}
err = cudnnFindConvolutionForwardAlgorithmEx(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output), 1, &count, &choice);
desc, APPLY_SPECIFIC(output), 1, &count, &choice, *(void **)tmpmem,
free);
gpudata_release(tmpmem);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -114,16 +137,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
algo = choice.algo;
#else
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
err = cudnnGetConvolutionForwardAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output),
......
......@@ -140,13 +140,31 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
#endif
if (!reuse_algo) {
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_LARGEST_MEMBLOCK, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
#ifdef CHOOSE_TIME
int count;
cudnnConvolutionBwdDataAlgoPerf_t choice;
gpudata *tmpmem;
tmpmem = gpudata_alloc(ctx, mem_sz, NULL, 0, NULL);
if (tmpmem == NULL) {
PyErr_SetString(PyExc_MemoryError, "Could not allocate working GPU memory");
return -1;
}
err = cudnnFindConvolutionBackwardDataAlgorithm(
err = cudnnFindConvolutionBackwardDataAlgorithmEx(
_handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(input), 1, &count, &choice);
APPLY_SPECIFIC(input), 1, &count, &choice, *(void **)tmpmem, free);
gpudata_release(tmpmem);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error selecting convolution algo: %s",
......@@ -157,16 +175,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo = choice.algo;
#else
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
err = cudnnGetConvolutionBackwardDataAlgorithm(
_handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output),
desc, APPLY_SPECIFIC(input),
......
......@@ -140,13 +140,34 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif
if (!reuse_algo) {
size_t free;
int err2 = gpucontext_property(ctx, GA_CTX_PROP_LARGEST_MEMBLOCK, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
// Guess 4Mb if the info is not available
if (free == 0) free = 4 * 1024 * 1024;
#ifdef CHOOSE_TIME
int count;
cudnnConvolutionBwdFilterAlgoPerf_t choice;
gpudata *tmpmem;
err = cudnnFindConvolutionBackwardFilterAlgorithm(
tmpmem = gpudata_alloc(ctx, free, NULL, 0, NULL);
if (tmpmem == NULL) {
PyErr_SetString(PyExc_MemoryError, "Could not allocate working GPU memory");
return -1;
}
err = cudnnFindConvolutionBackwardFilterAlgorithmEx(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(kerns), 1, &count, &choice);
APPLY_SPECIFIC(kerns), 1, &count, &choice, *(void **)tmpmem, free);
gpudata_release(tmpmem);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -158,16 +179,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo = choice.algo;
#else
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
err = cudnnGetConvolutionBackwardFilterAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output),
desc, APPLY_SPECIFIC(kerns),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论