提交 b728a250 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Update magma cholesky to use params type

上级 e44a468b
......@@ -384,9 +384,6 @@ class GpuMagmaBase(COp):
magma_init()
ctx.is_magma_initialized = True
def get_params(self, node):
return node.inputs[0].type.context
class GpuMagmaSVD(GpuMagmaBase):
"""Computes the svd of a matrix :math:`A` using magma library.
......@@ -534,6 +531,8 @@ class GpuMagmaCholesky(GpuMagmaBase, CGpuKernelBase):
"""
__props__ = ('lower', 'inplace')
check_input = False
params_type = ParamsType(lower=bool_t, inplace=bool_t, context=gpu_context_type)
def __init__(self, lower=True, inplace=False):
self.lower = lower
......@@ -555,13 +554,8 @@ class GpuMagmaCholesky(GpuMagmaBase, CGpuKernelBase):
raise TypeError("only `float32` is supported for now")
return theano.Apply(self, [A], [A.type()])
def get_op_params(self):
params = []
if self.lower:
params.append(('LOWER', '1'))
if self.inplace:
params.append(('INPLACE', '1'))
return params
def get_params(self, node):
return self.params_type.get_params(self, context=node.inputs[0].type.context)
def infer_shape(self, node, shapes):
return [shapes[0]]
......
......@@ -39,7 +39,7 @@ setup_ext_cuda();
#section support_code_struct
int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
PyGpuContextObject *c) {
PARAMS_TYPE* params) {
const size_t *dims;
size_t N, n2;
magma_uplo_t ul;
......@@ -50,37 +50,38 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
"GpuMagmaCholesky: unsupported data type");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx);
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaCholesky: requires data to be C-contiguous");
return -1;
goto fail;
}
if (PyGpuArray_NDIM(A) != 2) {
PyErr_SetString(PyExc_ValueError, "GpuMagmaCholesky: matrix rank error");
return -1;
goto fail;
}
dims = PyGpuArray_DIMS(A);
if (dims[0] != dims[1]) {
PyErr_SetString(PyExc_ValueError, "GpuMagmaCholesky: matrix is not square");
return -1;
goto fail;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
#ifdef INPLACE
Py_XDECREF(*L);
*L = A;
Py_INCREF(*L);
#else
*L = theano_try_copy(*L, A);
if (*L == NULL) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaCholesky: failed to allocate memory for the output");
goto fail;
if (params->inplace) {
Py_XDECREF(*L);
*L = A;
Py_INCREF(*L);
} else {
*L = theano_try_copy(*L, A);
if (*L == NULL) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaCholesky: failed to allocate memory for the output");
goto fail;
}
}
#endif
// magma matrix cholesky
N = dims[0];
......@@ -90,11 +91,12 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
// matrix order which requires copying data, we can compute cholesky
// decomposition where we change parameters lower to upper and upper to
// lower.
#ifdef LOWER
ul = MagmaUpper;
#else
ul = MagmaLower;
#endif
if (params->lower) {
ul = MagmaUpper;
}
else {
ul = MagmaLower;
}
magma_spotrf_gpu(ul, N, (float *)PyGpuArray_DEV_DATA(*L), N, &info);
if (info > 0) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: the leading minor of "
......@@ -109,23 +111,23 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
goto fail;
}
#ifdef LOWER
res = tril_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.offset, (*L)->ga.data);
if (res != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: tril_kernel %s.",
GpuKernel_error(&k_tril_kernel, res));
goto fail;
}
#else
res = triu_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.offset, (*L)->ga.data);
if (res != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: triu_kernel %s.",
GpuKernel_error(&k_triu_kernel, res));
goto fail;
if (params->lower) {
res = tril_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.offset, (*L)->ga.data);
if (res != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: tril_kernel %s.",
GpuKernel_error(&k_tril_kernel, res));
goto fail;
}
} else {
res = triu_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.offset, (*L)->ga.data);
if (res != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: triu_kernel %s.",
GpuKernel_error(&k_triu_kernel, res));
goto fail;
}
}
#endif
res = 0;
fail:
cuda_exit(c->ctx);
cuda_exit(params->context->ctx);
return res;
}
......@@ -20,7 +20,6 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **A_inv,
// This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx);
magma_init();
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
......
......@@ -27,7 +27,6 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
// This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx);
magma_init();
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
......
......@@ -74,7 +74,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax
from .linalg import (GpuCusolverSolve, MATRIX_STRUCTURES_SOLVE, GpuCholesky,
cusolver_available, GpuMagmaMatrixInverse, GpuMagmaSVD,
cusolver_available, GpuMagmaMatrixInverse, gpu_svd,
GpuMagmaCholesky, GpuMagmaQR, GpuMagmaEigh)
_logger = logging.getLogger("theano.gpuarray.opt")
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论