提交 55a78fe6 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Correctly init cuda context and fix magma errors

上级 4cfa236d
...@@ -355,12 +355,13 @@ class GpuMagmaMatrixInverse(COp): ...@@ -355,12 +355,13 @@ class GpuMagmaMatrixInverse(COp):
params_type = gpu_context_type params_type = gpu_context_type
def __init__(self, inplace=False): def __init__(self, inplace=False):
COp.__init__(self, ['magma_linalg.c'], COp.__init__(self, ['magma_inv.c'],
'APPLY_SPECIFIC(magma_matrix_inv)') 'APPLY_SPECIFIC(magma_inv)')
self.inplace = inplace self.inplace = inplace
def c_headers(self): def c_headers(self):
return ['gpuarray/array.h', 'gpuarray/blas.h', 'gpuarray_helper.h', 'magma.h'] return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h',
'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()] return [os.path.dirname(__file__), pygpu.get_include()]
...@@ -371,8 +372,8 @@ class GpuMagmaMatrixInverse(COp): ...@@ -371,8 +372,8 @@ class GpuMagmaMatrixInverse(COp):
def make_node(self, x): def make_node(self, x):
if x.ndim != 2: if x.ndim != 2:
raise LinAlgError("Matrix rank error") raise LinAlgError("Matrix rank error")
context_name = infer_context_name(x) ctx_name = infer_context_name(x)
x = as_gpuarray_variable(x, context_name) x = as_gpuarray_variable(x, ctx_name)
return theano.Apply(self, [x], [x.type()]) return theano.Apply(self, [x], [x.type()])
def get_params(self, node): def get_params(self, node):
......
...@@ -3,6 +3,10 @@ ...@@ -3,6 +3,10 @@
float *APPLY_SPECIFIC(dwork); float *APPLY_SPECIFIC(dwork);
magma_int_t *APPLY_SPECIFIC(piv); magma_int_t *APPLY_SPECIFIC(piv);
#section init_code
setup_ext_cuda();
#section init_code_struct #section init_code_struct
APPLY_SPECIFIC(dwork) = NULL; APPLY_SPECIFIC(dwork) = NULL;
...@@ -15,9 +19,8 @@ if (APPLY_SPECIFIC(piv) != NULL) {magma_free(APPLY_SPECIFIC(piv));} ...@@ -15,9 +19,8 @@ if (APPLY_SPECIFIC(piv) != NULL) {magma_free(APPLY_SPECIFIC(piv));}
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(magma_matrix_inv)(PyGpuArrayObject *A, int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv,
PyGpuArrayObject **_A_inv, PyGpuContextObject *c) {
PyGpuContextObject *ctx) {
PyGpuArrayObject *A_inv = *_A_inv; PyGpuArrayObject *A_inv = *_A_inv;
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
...@@ -50,29 +53,55 @@ int APPLY_SPECIFIC(magma_matrix_inv)(PyGpuArrayObject *A, ...@@ -50,29 +53,55 @@ int APPLY_SPECIFIC(magma_matrix_inv)(PyGpuArrayObject *A,
#endif #endif
{ {
// magma matrix inverse // magma matrix inverse
cuda_enter(c->ctx);
magma_init(); magma_init();
magma_int_t ldwork, info; magma_int_t N, ldwork, info;
magma_int_t N = x_dims[0]; N = x_dims[0];
ldwork = N * magma_get_sgetri_nb(N); ldwork = N * magma_get_sgetri_nb(N);
if (magma_smalloc(&APPLY_SPECIFIC(dwork), ldwork)) { if (magma_smalloc(&APPLY_SPECIFIC(dwork), ldwork)) {
PyErr_SetString( PyErr_SetString(
PyExc_RuntimeError, PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate magma working memory"); "GpuMagmaMatrixInverse: failed to allocate magma working memory");
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
if (magma_imalloc_cpu(&APPLY_SPECIFIC(piv), N)) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate memory for pivot array");
magma_finalize();
cuda_exit(c->ctx);
return 1; return 1;
} }
APPLY_SPECIFIC(piv) = (magma_int_t *)malloc(N * sizeof(magma_int_t)); float *A_ptr = (float *)PyGpuArray_DEV_DATA(A_inv);
float *A_ptr = (float *) ((void **)A_inv->ga.data)[0]; magma_sgetrf_gpu(N, N, A_ptr, N, APPLY_SPECIFIC(piv), &info);
magma_sgetri_gpu(N, A_ptr, N, APPLY_SPECIFIC(piv), APPLY_SPECIFIC(dwork), ldwork, &info); if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetrf_gpu returned error %d: %s.",
info, magma_strerror(info));
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
magma_sgetri_gpu(N, A_ptr, N, APPLY_SPECIFIC(piv), APPLY_SPECIFIC(dwork),
ldwork, &info);
if (info != 0) { if (info != 0) {
PyErr_Format( PyErr_Format(
PyExc_RuntimeError, PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d", info); "GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d: %s.",
info, magma_strerror(info));
magma_finalize();
cuda_exit(c->ctx);
return 1; return 1;
} }
magma_finalize(); magma_finalize();
cuda_exit(c->ctx);
} }
*_A_inv = A_inv; *_A_inv = A_inv;
return 0; return 0;
......
...@@ -2006,13 +2006,10 @@ def local_inplace_cholesky(node): ...@@ -2006,13 +2006,10 @@ def local_inplace_cholesky(node):
return [GpuCholesky(lower=node.op.lower, inplace=True)(*node.inputs)] return [GpuCholesky(lower=node.op.lower, inplace=True)(*node.inputs)]
@register_opt('fast_compile') @register_opt('magma', 'fast_compile')
@op_lifter([nlinalg.MatrixInverse]) @op_lifter([nlinalg.MatrixInverse])
@register_opt2([theano.tensor.nlinalg.MatrixInverse], 'fast_compile') @register_opt2([theano.tensor.nlinalg.MatrixInverse], 'magma', 'fast_compile')
def local_gpu_matrix_inverse(op, context_name, inputs, outputs): def local_gpu_matrix_inverse(op, context_name, inputs, outputs):
magma_available = True
if not magma_available:
return
return GpuMagmaMatrixInverse() return GpuMagmaMatrixInverse()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论