提交 118c35e8 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Add magma cholesky decomposition

上级 550b2d41
...@@ -13,7 +13,8 @@ from theano.scalar import bool as bool_t ...@@ -13,7 +13,8 @@ from theano.scalar import bool as bool_t
from theano.gof import COp, ParamsType from theano.gof import COp, ParamsType
from theano.gpuarray import GpuArrayType from theano.gpuarray import GpuArrayType
from .basic_ops import as_gpuarray_variable, gpu_contiguous, infer_context_name from .basic_ops import (CGpuKernelBase, as_gpuarray_variable, gpu_contiguous,
infer_context_name)
from .type import gpu_context_type from .type import gpu_context_type
try: try:
...@@ -518,3 +519,59 @@ def gpu_matrix_inverse(a): ...@@ -518,3 +519,59 @@ def gpu_matrix_inverse(a):
""" """
return GpuMagmaMatrixInverse()(a) return GpuMagmaMatrixInverse()(a)
class GpuMagmaCholesky(CGpuKernelBase):
"""Computes the cholesky decomposition of a matrix :math:`A` using magma
library.
"""
params_type = gpu_context_type
__props__ = ('lower', 'inplace')
def __init__(self, lower=True, inplace=False):
self.lower = lower
COp.__init__(self, ['magma_cholesky.c'], 'APPLY_SPECIFIC(magma_cholesky)')
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0]}
def c_headers(self):
return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h',
'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self):
dirs = [os.path.dirname(__file__), pygpu.get_include()]
if config.magma.include_path:
dirs.append(config.magma.include_path)
return dirs
def c_libraries(self):
return ['magma']
def c_lib_dirs(self):
if config.magma.library_path:
return [config.magma.library_path]
return []
def make_node(self, A):
ctx_name = infer_context_name(A)
A = as_gpuarray_variable(A, ctx_name)
A = gpu_contiguous(A)
if A.ndim != 2:
raise LinAlgError("Matrix rank error")
return theano.Apply(self, [A], [A.type()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self):
params = []
if self.lower:
params.append(('LOWER', '1'))
if self.inplace:
params.append(('INPLACE', '1'))
return params
def infer_shape(self, node, shapes):
return [shapes[0]]
#section kernels
#kernel tril_kernel : size, size, *:
KERNEL void tril_kernel(const ga_size nthreads, const ga_size ncols,
GLOBAL_MEM DTYPE_INPUT_0 *a) {
// grid stride looping
for (ga_size index = GID_0 * LDIM_0 + LID_0; index < nthreads;
index += LDIM_0 * GDIM_0) {
unsigned int ix = index / ncols;
unsigned int iy = index % ncols;
if (index < nthreads) {
if (ix < iy) {
a[index] = 0.0;
}
}
}
}
#kernel triu_kernel : size, size, *:
KERNEL void triu_kernel(const ga_size nthreads, const ga_size ncols,
GLOBAL_MEM DTYPE_INPUT_0 *a) {
// grid stride looping
for (ga_size index = GID_0 * LDIM_0 + LID_0; index < nthreads;
index += LDIM_0 * GDIM_0) {
unsigned int ix = index / ncols;
unsigned int iy = index % ncols;
if (index < nthreads) {
if (ix > iy) {
a[index] = 0.0;
}
}
}
}
#section init_code
setup_ext_cuda();
#section support_code_struct
int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
PyGpuContextObject *c) {
const size_t *dims;
size_t N, n2;
magma_uplo_t ul;
int res = -1, info;
if (A->ga.typecode != GA_FLOAT) {
PyErr_SetString(PyExc_TypeError,
"GpuMagmaCholesky: unsupported data type");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
magma_init();
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaCholesky: requires data to be C-contiguous");
goto fail;
}
if (PyGpuArray_NDIM(A) != 2) {
PyErr_SetString(PyExc_ValueError, "GpuMagmaCholesky: matrix rank error");
goto fail;
}
dims = PyGpuArray_DIMS(A);
if (dims[0] != dims[1]) {
PyErr_SetString(PyExc_ValueError, "GpuMagmaCholesky: matrix is not square");
goto fail;
}
#ifdef INPLACE
Py_XDECREF(*L);
*L = A;
Py_INCREF(*L);
#else
*L = theano_try_copy(*L, A);
if (*L == NULL) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaCholesky: failed to allocate memory for the output");
goto fail;
}
#endif
// magma matrix cholesky
N = dims[0];
n2 = N * N;
// Magma requires column-major order for the matrix A. Instead of changing
// matrix order which requires copying data, we can compute cholesky
// decomposition where we change parameters lower to upper and upper to
// lower.
#ifdef LOWER
ul = MagmaUpper;
#else
ul = MagmaLower;
#endif
magma_spotrf_gpu(ul, N, (float *)PyGpuArray_DEV_DATA(*L), N, &info);
if (info > 0) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: the leading minor of "
"order %d is not positive definite",
info);
goto fail;
} else if (info < 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaCholesky: magma_spotrf_gpu argument %d has an illegal value",
-info);
goto fail;
}
#ifdef LOWER
res = tril_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.data);
if (res != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: triu_kernel %s.",
GpuKernel_error(&k_triu_kernel, res));
goto fail;
}
#else
res = triu_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.data);
if (res != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: triu_kernel %s.",
GpuKernel_error(&k_triu_kernel, res));
goto fail;
}
#endif
res = 0;
fail:
magma_finalize();
cuda_exit(c->ctx);
return res;
}
...@@ -559,8 +559,7 @@ class QRIncomplete(Op): ...@@ -559,8 +559,7 @@ class QRIncomplete(Op):
(x,) = inputs (x,) = inputs
(q,) = outputs (q,) = outputs
assert x.ndim == 2, "The input of qr function should be a matrix." assert x.ndim == 2, "The input of qr function should be a matrix."
q[0] = self._numop(x, q[0] = self._numop(x, self.mode)
self.mode)
def qr(a, mode="reduced"): def qr(a, mode="reduced"):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论