提交 9d413f18 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Use divide and conquer algorithm to compute svd (faster)

上级 5f1e372d
...@@ -13,8 +13,9 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, ...@@ -13,8 +13,9 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
PyGpuArrayObject **VT, PyGpuArrayObject **VT,
#endif #endif
PyGpuContextObject *c) { PyGpuContextObject *c) {
magma_int_t *iwork = NULL, iunused[1];
magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info; magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info;
magma_vec_t jobu, jobv; magma_vec_t jobz;
size_t s_dims[1], u_dims[2], vt_dims[2]; size_t s_dims[1], u_dims[2], vt_dims[2];
float *a_data = NULL, *s_data = NULL, *u_data = NULL, *vt_data = NULL, float *a_data = NULL, *s_data = NULL, *u_data = NULL, *vt_data = NULL,
*work = NULL; *work = NULL;
...@@ -64,14 +65,12 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, ...@@ -64,14 +65,12 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
#ifdef COMPUTE_UV #ifdef COMPUTE_UV
#ifdef FULL_MATRICES #ifdef FULL_MATRICES
jobu = MagmaAllVec; jobz = MagmaAllVec;
jobv = MagmaAllVec;
#else #else
jobu = MagmaSomeVec; jobz = MagmaSomeVec;
jobv = MagmaSomeVec;
#endif #endif
M_U = (jobu == MagmaAllVec ? M : K); M_U = (jobz == MagmaAllVec ? M : K);
N_VT = (jobv == MagmaAllVec ? N : K); N_VT = (jobz == MagmaAllVec ? N : K);
ldu = M; ldu = M;
ldv = N_VT; ldv = N_VT;
...@@ -86,15 +85,14 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, ...@@ -86,15 +85,14 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
goto fail; goto fail;
} }
#else #else
jobu = MagmaNoVec; jobz = MagmaNoVec;
jobv = MagmaNoVec;
ldu = M; ldu = M;
ldv = N; ldv = N;
#endif #endif
// query for workspace size // query for workspace size
magma_sgesvd(jobu, jobv, M, N, NULL, M, NULL, NULL, ldu, NULL, ldv, magma_sgesdd(jobz, M, N, NULL, M, NULL, NULL, ldu, NULL, ldv,
dummy, -1, &info); dummy, -1, iunused, &info);
lwork = (magma_int_t) MAGMA_S_REAL(dummy[0]); lwork = (magma_int_t) MAGMA_S_REAL(dummy[0]);
if (MAGMA_SUCCESS != magma_smalloc_pinned(&work, lwork)) { if (MAGMA_SUCCESS != magma_smalloc_pinned(&work, lwork)) {
...@@ -103,13 +101,19 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, ...@@ -103,13 +101,19 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
goto fail; goto fail;
} }
if (MAGMA_SUCCESS != magma_imalloc_cpu(&iwork, 8*K)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate working memory");
goto fail;
}
// compute svd // compute svd
magma_sgesvd(jobu, jobv, M, N, a_data, M, s_data, magma_sgesdd(jobz, M, N, a_data, M, s_data,
u_data, ldu, vt_data, ldv, work, lwork, &info); u_data, ldu, vt_data, ldv, work, lwork, iwork, &info);
if (info > 0) { if (info > 0) {
PyErr_Format( PyErr_Format(
PyExc_RuntimeError, PyExc_RuntimeError,
"GpuMagmaSVD: magma_sgesvd_gpu %d superdiagonals failed to converge", "GpuMagmaSVD: the updating process of SBDSDC did not converge (error: %s).",
info); info);
goto fail; goto fail;
} else if (info < 0) { } else if (info < 0) {
...@@ -163,6 +167,8 @@ fail: ...@@ -163,6 +167,8 @@ fail:
magma_free_pinned(vt_data); magma_free_pinned(vt_data);
if (work != NULL) if (work != NULL)
magma_free_pinned(work); magma_free_pinned(work);
if (iwork != NULL)
magma_free_cpu(iwork);
magma_finalize(); magma_finalize();
cuda_exit(c->ctx); cuda_exit(c->ctx);
return res; return res;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论