提交 da1374e8 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Add magma gpu eigh (eigen decompositon for a symmetric matrix)

上级 deedf6d5
......@@ -593,7 +593,7 @@ class GpuMagmaQR(CGpuKernelBase):
"""
params_type = gpu_context_type
__props__ = ('complete',)
__props__ = ('complete', )
def __init__(self, complete=True):
self.complete = complete
......@@ -636,3 +636,68 @@ class GpuMagmaQR(CGpuKernelBase):
if self.complete:
params.append(('COMPLETE', '1'))
return params
class GpuMagmaEigh(COp):
"""Computes the eigen decomposition of a symmetric matrix :math:`A` using magma
library.
Parameters
----------
UPLO : Specifies whether the calculation is done with the lower triangular
part of matrix (`L`, default) or the upper triangular part (`U`).
compute_v : If `True`, computes eigenvalues and eigenvectors (`True`,
default). If `False`, computes only eigenvalues of matrix.
"""
__props__ = ('lower', )
params_type = gpu_context_type
def __init__(self, UPLO='L', compute_v=True):
assert UPLO in ['L', 'U']
self.lower = UPLO == 'L'
self.compute_v = compute_v
COp.__init__(self, ['magma_eigh.c'], 'APPLY_SPECIFIC(magma_eigh)')
def c_headers(self):
return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h',
'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self):
dirs = [os.path.dirname(__file__), pygpu.get_include()]
if config.magma.include_path:
dirs.append(config.magma.include_path)
return dirs
def c_libraries(self):
return ['magma']
def c_lib_dirs(self):
if config.magma.library_path:
return [config.magma.library_path]
return []
def make_node(self, A):
ctx_name = infer_context_name(A)
A = as_gpuarray_variable(A, ctx_name)
if A.ndim != 2:
raise LinAlgError("Matrix rank error")
if self.compute_v:
return theano.Apply(self, [A],
[GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)(),
A.type()])
else:
return theano.Apply(self, [A],
[GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self):
params = []
if self.lower:
params.append(('LOWER', '1'))
if self.compute_v:
params.append(('COMPUTE_V', '1'))
return params
#section init_code
setup_ext_cuda();
#section support_code_struct
int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
PyGpuArrayObject **D,
#ifdef COMPUTE_V
PyGpuArrayObject **V,
#endif
PyGpuContextObject *c) {
PyGpuArrayObject *A = NULL;
magma_int_t N, liwork, *iwork_data = NULL;
size_t d_dims[1], v_dims[2];
magma_uplo_t uplo;
magma_vec_t jobz;
float *w_data = NULL, *wA_data = NULL, *work_data = NULL, lwork;
int res = -1, info;
if (A_->ga.typecode != GA_FLOAT) {
PyErr_SetString(PyExc_TypeError,
"GpuMagmaEigh: Unsupported data type");
return -1;
}
if (!GpuArray_IS_C_CONTIGUOUS(&A_->ga)) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaEigh: requires data to be C-contiguous");
return -1;
}
if (PyGpuArray_NDIM(A_) != 2) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaEigh: matrix rank error");
return -1;
}
if (PyGpuArray_DIM(A_, 0) != PyGpuArray_DIM(A_, 1)) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaEigh: matrix is not square");
return -1;
}
A = pygpu_copy(A_, GA_F_ORDER);
if (A == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaQR: failed to change to column-major order");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
magma_init();
// magma matrix eigen decomposition of a symmetric matrix
N = PyGpuArray_DIM(A, 0);
#ifdef LOWER
uplo = MagmaLower;
#else
uplo = MagmaUpper;
#endif
#ifdef COMPUTE_V
jobz = MagmaVec;
#else
jobz = MagmaNoVec;
#endif
if (MAGMA_SUCCESS != magma_smalloc_pinned(&w_data, N)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate working memory");
goto fail;
}
if (MAGMA_SUCCESS != magma_smalloc_pinned(&wA_data, N * N)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate working memory");
goto fail;
}
// query for workspace size
magma_ssyevd_gpu(jobz, uplo, N, NULL, N, NULL, NULL, N, &lwork,
-1, &liwork, -1, &info);
if (MAGMA_SUCCESS != magma_smalloc_pinned(&work_data, (size_t)lwork)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaEigh: failed to allocate working memory");
goto fail;
}
if (MAGMA_SUCCESS != magma_imalloc_cpu(&iwork_data, liwork)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaEigh: failed to allocate working memory");
goto fail;
}
magma_ssyevd_gpu(jobz, uplo, N, (float *)PyGpuArray_DEV_DATA(A), N, w_data,
wA_data, N, work_data, (size_t)lwork, iwork_data, liwork,
&info);
if (info > 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaEigh: %d off-diagonal elements of an didn't converge to zero",
info);
goto fail;
} else if (info < 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaEigh: magma_ssyevd_gpu argument %d has an illegal value", -info);
goto fail;
}
d_dims[0] = N;
if (theano_prep_output(D, 1, d_dims, A->ga.typecode, GA_C_ORDER, c) != 0){
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaEigh: failed to allocate memory for the output");
goto fail;
}
cudaMemcpy(PyGpuArray_DEV_DATA(*D), w_data, N * sizeof(float),
cudaMemcpyDeviceToDevice);
#ifdef COMPUTE_V
*V = theano_try_copy(*V, A);
if (*V == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaEigh: failed to allocate memory for the output");
goto fail;
}
#endif
res = 0;
fail:
if (w_data != NULL)
magma_free_pinned(w_data);
if (wA_data != NULL)
magma_free_pinned(wA_data);
if (work_data != NULL)
magma_free_pinned(work_data);
if (iwork_data != NULL)
magma_free_cpu(iwork_data);
magma_finalize();
cuda_exit(c->ctx);
return res;
}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论