提交 5ac884f6 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Fix magma gpu memory leak

上级 55a78fe6
#section support_code_struct
float *APPLY_SPECIFIC(dwork);
magma_int_t *APPLY_SPECIFIC(piv);
#section init_code
setup_ext_cuda();
#section init_code_struct
APPLY_SPECIFIC(dwork) = NULL;
APPLY_SPECIFIC(piv) = NULL;
#section cleanup_code_struct
if (APPLY_SPECIFIC(dwork) != NULL) {magma_free(APPLY_SPECIFIC(dwork));}
if (APPLY_SPECIFIC(piv) != NULL) {magma_free(APPLY_SPECIFIC(piv));}
#section support_code_struct
int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv,
PyGpuContextObject *c) {
PyGpuArrayObject *A_inv = *_A_inv;
const size_t *dims;
magma_int_t N, ldwork, info;
magma_int_t *piv = NULL;
gpudata *dwork = NULL;
int res = -1;
if (A->ga.typecode != GA_FLOAT) {
PyErr_SetString(PyExc_TypeError,
"GpuMagmaMatrixInverse: Unsupported data type");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
magma_init();
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
......@@ -31,13 +31,13 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv,
if (PyGpuArray_NDIM(A) != 2) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaMatrixInverse: matrix rank error");
return 1;
goto fail;
}
const size_t *x_dims = PyGpuArray_DIMS(A);
if (x_dims[0] != x_dims[1]) {
dims = PyGpuArray_DIMS(A);
if (dims[0] != dims[1]) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaMatrixInverse: matrix is not square");
return 1;
goto fail;
}
#ifdef INPLACE
Py_XDECREF(out);
......@@ -46,63 +46,56 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv,
#else
A_inv = theano_try_copy(A_inv, A);
if (A_inv == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate memory");
return 1;
PyEr_SetString(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate memory for the output");
goto fail;
}
#endif
{
// magma matrix inverse
cuda_enter(c->ctx);
magma_init();
// magma matrix inverse
magma_int_t N, ldwork, info;
N = x_dims[0];
N = dims[0];
ldwork = N * magma_get_sgetri_nb(N);
if (magma_smalloc(&APPLY_SPECIFIC(dwork), ldwork)) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate magma working memory");
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
ldwork = N * magma_get_sgetri_nb(N);
dwork = gpudata_alloc(c->ctx, ldwork * sizeof(float), NULL, 0, NULL);
if (dwork == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate working memory");
goto fail;
}
if (magma_imalloc_cpu(&APPLY_SPECIFIC(piv), N)) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate memory for pivot array");
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
if (magma_imalloc_cpu(&piv, N)) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate memory for the pivot array");
goto fail;
}
float *A_ptr = (float *)PyGpuArray_DEV_DATA(A_inv);
magma_sgetrf_gpu(N, N, A_ptr, N, APPLY_SPECIFIC(piv), &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetrf_gpu returned error %d: %s.",
info, magma_strerror(info));
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
magma_sgetri_gpu(N, A_ptr, N, APPLY_SPECIFIC(piv), APPLY_SPECIFIC(dwork),
ldwork, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d: %s.",
info, magma_strerror(info));
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
magma_finalize();
cuda_exit(c->ctx);
magma_sgetrf_gpu(N, N, (float *)PyGpuArray_DEV_DATA(A_inv), N, piv, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetrf_gpu returned error %d: %s.", info,
magma_strerror(info));
goto fail;
}
magma_sgetri_gpu(N, (float *)PyGpuArray_DEV_DATA(A_inv), N, piv,
*(float **)dwork, ldwork, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d: %s.", info,
magma_strerror(info));
goto fail;
}
res = 0;
*_A_inv = A_inv;
return 0;
fail:
if (piv != NULL)
magma_free(piv);
if (dwork != NULL)
gpudata_release(dwork);
magma_finalize();
cuda_exit(c->ctx);
return res;
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论