提交 5ac884f6 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Fix magma gpu memory leak

上级 55a78fe6
#section support_code_struct
float *APPLY_SPECIFIC(dwork);
magma_int_t *APPLY_SPECIFIC(piv);
#section init_code #section init_code
setup_ext_cuda(); setup_ext_cuda();
#section init_code_struct
APPLY_SPECIFIC(dwork) = NULL;
APPLY_SPECIFIC(piv) = NULL;
#section cleanup_code_struct
if (APPLY_SPECIFIC(dwork) != NULL) {magma_free(APPLY_SPECIFIC(dwork));}
if (APPLY_SPECIFIC(piv) != NULL) {magma_free(APPLY_SPECIFIC(piv));}
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv, int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv,
PyGpuContextObject *c) { PyGpuContextObject *c) {
PyGpuArrayObject *A_inv = *_A_inv; PyGpuArrayObject *A_inv = *_A_inv;
const size_t *dims;
magma_int_t N, ldwork, info;
magma_int_t *piv = NULL;
gpudata *dwork = NULL;
int res = -1;
if (A->ga.typecode != GA_FLOAT) {
PyErr_SetString(PyExc_TypeError,
"GpuMagmaMatrixInverse: Unsupported data type");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
magma_init();
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
...@@ -31,13 +31,13 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv, ...@@ -31,13 +31,13 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv,
if (PyGpuArray_NDIM(A) != 2) { if (PyGpuArray_NDIM(A) != 2) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuMagmaMatrixInverse: matrix rank error"); "GpuMagmaMatrixInverse: matrix rank error");
return 1; goto fail;
} }
const size_t *x_dims = PyGpuArray_DIMS(A); dims = PyGpuArray_DIMS(A);
if (x_dims[0] != x_dims[1]) { if (dims[0] != dims[1]) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuMagmaMatrixInverse: matrix is not square"); "GpuMagmaMatrixInverse: matrix is not square");
return 1; goto fail;
} }
#ifdef INPLACE #ifdef INPLACE
Py_XDECREF(out); Py_XDECREF(out);
...@@ -46,63 +46,56 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv, ...@@ -46,63 +46,56 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv,
#else #else
A_inv = theano_try_copy(A_inv, A); A_inv = theano_try_copy(A_inv, A);
if (A_inv == NULL) { if (A_inv == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyEr_SetString(
"GpuMagmaMatrixInverse: failed to allocate memory"); PyExc_RuntimeError,
return 1; "GpuMagmaMatrixInverse: failed to allocate memory for the output");
goto fail;
} }
#endif #endif
{ // magma matrix inverse
// magma matrix inverse
cuda_enter(c->ctx);
magma_init();
magma_int_t N, ldwork, info; N = dims[0];
N = x_dims[0];
ldwork = N * magma_get_sgetri_nb(N); ldwork = N * magma_get_sgetri_nb(N);
if (magma_smalloc(&APPLY_SPECIFIC(dwork), ldwork)) { dwork = gpudata_alloc(c->ctx, ldwork * sizeof(float), NULL, 0, NULL);
PyErr_SetString( if (dwork == NULL) {
PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate magma working memory"); "GpuMagmaMatrixInverse: failed to allocate working memory");
magma_finalize(); goto fail;
cuda_exit(c->ctx); }
return 1;
}
if (magma_imalloc_cpu(&APPLY_SPECIFIC(piv), N)) { if (magma_imalloc_cpu(&piv, N)) {
PyErr_SetString( PyErr_SetString(
PyExc_RuntimeError, PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate memory for pivot array"); "GpuMagmaMatrixInverse: failed to allocate memory for the pivot array");
magma_finalize(); goto fail;
cuda_exit(c->ctx); }
return 1;
}
float *A_ptr = (float *)PyGpuArray_DEV_DATA(A_inv); magma_sgetrf_gpu(N, N, (float *)PyGpuArray_DEV_DATA(A_inv), N, piv, &info);
magma_sgetrf_gpu(N, N, A_ptr, N, APPLY_SPECIFIC(piv), &info); if (info != 0) {
if (info != 0) { PyErr_Format(
PyErr_Format( PyExc_RuntimeError,
PyExc_RuntimeError, "GpuMagmaMatrixInverse: magma_sgetrf_gpu returned error %d: %s.", info,
"GpuMagmaMatrixInverse: magma_sgetrf_gpu returned error %d: %s.", magma_strerror(info));
info, magma_strerror(info)); goto fail;
magma_finalize(); }
cuda_exit(c->ctx); magma_sgetri_gpu(N, (float *)PyGpuArray_DEV_DATA(A_inv), N, piv,
return 1; *(float **)dwork, ldwork, &info);
} if (info != 0) {
magma_sgetri_gpu(N, A_ptr, N, APPLY_SPECIFIC(piv), APPLY_SPECIFIC(dwork), PyErr_Format(
ldwork, &info); PyExc_RuntimeError,
if (info != 0) { "GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d: %s.", info,
PyErr_Format( magma_strerror(info));
PyExc_RuntimeError, goto fail;
"GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d: %s.",
info, magma_strerror(info));
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
magma_finalize();
cuda_exit(c->ctx);
} }
res = 0;
*_A_inv = A_inv; *_A_inv = A_inv;
return 0; fail:
if (piv != NULL)
magma_free(piv);
if (dwork != NULL)
gpudata_release(dwork);
magma_finalize();
cuda_exit(c->ctx);
return res;
} }
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论