提交 b728a250 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Update magma cholesky to use params type

上级 e44a468b
...@@ -384,9 +384,6 @@ class GpuMagmaBase(COp): ...@@ -384,9 +384,6 @@ class GpuMagmaBase(COp):
magma_init() magma_init()
ctx.is_magma_initialized = True ctx.is_magma_initialized = True
def get_params(self, node):
return node.inputs[0].type.context
class GpuMagmaSVD(GpuMagmaBase): class GpuMagmaSVD(GpuMagmaBase):
"""Computes the svd of a matrix :math:`A` using magma library. """Computes the svd of a matrix :math:`A` using magma library.
...@@ -534,6 +531,8 @@ class GpuMagmaCholesky(GpuMagmaBase, CGpuKernelBase): ...@@ -534,6 +531,8 @@ class GpuMagmaCholesky(GpuMagmaBase, CGpuKernelBase):
""" """
__props__ = ('lower', 'inplace') __props__ = ('lower', 'inplace')
check_input = False
params_type = ParamsType(lower=bool_t, inplace=bool_t, context=gpu_context_type)
def __init__(self, lower=True, inplace=False): def __init__(self, lower=True, inplace=False):
self.lower = lower self.lower = lower
...@@ -555,13 +554,8 @@ class GpuMagmaCholesky(GpuMagmaBase, CGpuKernelBase): ...@@ -555,13 +554,8 @@ class GpuMagmaCholesky(GpuMagmaBase, CGpuKernelBase):
raise TypeError("only `float32` is supported for now") raise TypeError("only `float32` is supported for now")
return theano.Apply(self, [A], [A.type()]) return theano.Apply(self, [A], [A.type()])
def get_op_params(self): def get_params(self, node):
params = [] return self.params_type.get_params(self, context=node.inputs[0].type.context)
if self.lower:
params.append(('LOWER', '1'))
if self.inplace:
params.append(('INPLACE', '1'))
return params
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
return [shapes[0]] return [shapes[0]]
......
...@@ -39,7 +39,7 @@ setup_ext_cuda(); ...@@ -39,7 +39,7 @@ setup_ext_cuda();
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L, int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
PyGpuContextObject *c) { PARAMS_TYPE* params) {
const size_t *dims; const size_t *dims;
size_t N, n2; size_t N, n2;
magma_uplo_t ul; magma_uplo_t ul;
...@@ -50,37 +50,38 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L, ...@@ -50,37 +50,38 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
"GpuMagmaCholesky: unsupported data type"); "GpuMagmaCholesky: unsupported data type");
return -1; return -1;
} }
// This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx);
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuMagmaCholesky: requires data to be C-contiguous"); "GpuMagmaCholesky: requires data to be C-contiguous");
return -1; goto fail;
} }
if (PyGpuArray_NDIM(A) != 2) { if (PyGpuArray_NDIM(A) != 2) {
PyErr_SetString(PyExc_ValueError, "GpuMagmaCholesky: matrix rank error"); PyErr_SetString(PyExc_ValueError, "GpuMagmaCholesky: matrix rank error");
return -1; goto fail;
} }
dims = PyGpuArray_DIMS(A); dims = PyGpuArray_DIMS(A);
if (dims[0] != dims[1]) { if (dims[0] != dims[1]) {
PyErr_SetString(PyExc_ValueError, "GpuMagmaCholesky: matrix is not square"); PyErr_SetString(PyExc_ValueError, "GpuMagmaCholesky: matrix is not square");
return -1; goto fail;
} }
// This is early to match the exit() in the fail label. if (params->inplace) {
cuda_enter(c->ctx); Py_XDECREF(*L);
*L = A;
#ifdef INPLACE Py_INCREF(*L);
Py_XDECREF(*L); } else {
*L = A; *L = theano_try_copy(*L, A);
Py_INCREF(*L); if (*L == NULL) {
#else PyErr_SetString(
*L = theano_try_copy(*L, A); PyExc_RuntimeError,
if (*L == NULL) { "GpuMagmaCholesky: failed to allocate memory for the output");
PyErr_SetString( goto fail;
PyExc_RuntimeError, }
"GpuMagmaCholesky: failed to allocate memory for the output");
goto fail;
} }
#endif
// magma matrix cholesky // magma matrix cholesky
N = dims[0]; N = dims[0];
...@@ -90,11 +91,12 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L, ...@@ -90,11 +91,12 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
// matrix order which requires copying data, we can compute cholesky // matrix order which requires copying data, we can compute cholesky
// decomposition where we change parameters lower to upper and upper to // decomposition where we change parameters lower to upper and upper to
// lower. // lower.
#ifdef LOWER if (params->lower) {
ul = MagmaUpper; ul = MagmaUpper;
#else }
ul = MagmaLower; else {
#endif ul = MagmaLower;
}
magma_spotrf_gpu(ul, N, (float *)PyGpuArray_DEV_DATA(*L), N, &info); magma_spotrf_gpu(ul, N, (float *)PyGpuArray_DEV_DATA(*L), N, &info);
if (info > 0) { if (info > 0) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: the leading minor of " PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: the leading minor of "
...@@ -109,23 +111,23 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L, ...@@ -109,23 +111,23 @@ int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
goto fail; goto fail;
} }
#ifdef LOWER if (params->lower) {
res = tril_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.offset, (*L)->ga.data); res = tril_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.offset, (*L)->ga.data);
if (res != GA_NO_ERROR) { if (res != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: tril_kernel %s.", PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: tril_kernel %s.",
GpuKernel_error(&k_tril_kernel, res)); GpuKernel_error(&k_tril_kernel, res));
goto fail; goto fail;
} }
#else } else {
res = triu_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.offset, (*L)->ga.data); res = triu_kernel_scall(1, &n2, 0, n2, N, (*L)->ga.offset, (*L)->ga.data);
if (res != GA_NO_ERROR) { if (res != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: triu_kernel %s.", PyErr_Format(PyExc_RuntimeError, "GpuMagmaCholesky: triu_kernel %s.",
GpuKernel_error(&k_triu_kernel, res)); GpuKernel_error(&k_triu_kernel, res));
goto fail; goto fail;
}
} }
#endif
res = 0; res = 0;
fail: fail:
cuda_exit(c->ctx); cuda_exit(params->context->ctx);
return res; return res;
} }
...@@ -20,7 +20,6 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **A_inv, ...@@ -20,7 +20,6 @@ int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **A_inv,
// This is early to match the exit() in the fail label. // This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx); cuda_enter(params->context->ctx);
magma_init();
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
......
...@@ -27,7 +27,6 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, ...@@ -27,7 +27,6 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
// This is early to match the exit() in the fail label. // This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx); cuda_enter(params->context->ctx);
magma_init();
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
......
...@@ -74,7 +74,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor, ...@@ -74,7 +74,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax from .reduction import GpuMaxAndArgmax
from .linalg import (GpuCusolverSolve, MATRIX_STRUCTURES_SOLVE, GpuCholesky, from .linalg import (GpuCusolverSolve, MATRIX_STRUCTURES_SOLVE, GpuCholesky,
cusolver_available, GpuMagmaMatrixInverse, GpuMagmaSVD, cusolver_available, GpuMagmaMatrixInverse, gpu_svd,
GpuMagmaCholesky, GpuMagmaQR, GpuMagmaEigh) GpuMagmaCholesky, GpuMagmaQR, GpuMagmaEigh)
_logger = logging.getLogger("theano.gpuarray.opt") _logger = logging.getLogger("theano.gpuarray.opt")
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论