提交 55a78fe6 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Correctly init cuda context and fix magma errors

上级 4cfa236d
......@@ -355,12 +355,13 @@ class GpuMagmaMatrixInverse(COp):
params_type = gpu_context_type
def __init__(self, inplace=False):
COp.__init__(self, ['magma_linalg.c'],
'APPLY_SPECIFIC(magma_matrix_inv)')
COp.__init__(self, ['magma_inv.c'],
'APPLY_SPECIFIC(magma_inv)')
self.inplace = inplace
def c_headers(self):
return ['gpuarray/array.h', 'gpuarray/blas.h', 'gpuarray_helper.h', 'magma.h']
return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h',
'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), pygpu.get_include()]
......@@ -371,8 +372,8 @@ class GpuMagmaMatrixInverse(COp):
def make_node(self, x):
if x.ndim != 2:
raise LinAlgError("Matrix rank error")
context_name = infer_context_name(x)
x = as_gpuarray_variable(x, context_name)
ctx_name = infer_context_name(x)
x = as_gpuarray_variable(x, ctx_name)
return theano.Apply(self, [x], [x.type()])
def get_params(self, node):
......
......@@ -3,6 +3,10 @@
float *APPLY_SPECIFIC(dwork);
magma_int_t *APPLY_SPECIFIC(piv);
#section init_code
setup_ext_cuda();
#section init_code_struct
APPLY_SPECIFIC(dwork) = NULL;
......@@ -15,9 +19,8 @@ if (APPLY_SPECIFIC(piv) != NULL) {magma_free(APPLY_SPECIFIC(piv));}
#section support_code_struct
int APPLY_SPECIFIC(magma_matrix_inv)(PyGpuArrayObject *A,
PyGpuArrayObject **_A_inv,
PyGpuContextObject *ctx) {
int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **_A_inv,
PyGpuContextObject *c) {
PyGpuArrayObject *A_inv = *_A_inv;
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
......@@ -50,29 +53,55 @@ int APPLY_SPECIFIC(magma_matrix_inv)(PyGpuArrayObject *A,
#endif
{
// magma matrix inverse
cuda_enter(c->ctx);
magma_init();
magma_int_t ldwork, info;
magma_int_t N = x_dims[0];
magma_int_t N, ldwork, info;
N = x_dims[0];
ldwork = N * magma_get_sgetri_nb(N);
if (magma_smalloc(&APPLY_SPECIFIC(dwork), ldwork)) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate magma working memory");
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
if (magma_imalloc_cpu(&APPLY_SPECIFIC(piv), N)) {
PyErr_SetString(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: failed to allocate memory for pivot array");
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
APPLY_SPECIFIC(piv) = (magma_int_t *)malloc(N * sizeof(magma_int_t));
float *A_ptr = (float *) ((void **)A_inv->ga.data)[0];
magma_sgetri_gpu(N, A_ptr, N, APPLY_SPECIFIC(piv), APPLY_SPECIFIC(dwork), ldwork, &info);
float *A_ptr = (float *)PyGpuArray_DEV_DATA(A_inv);
magma_sgetrf_gpu(N, N, A_ptr, N, APPLY_SPECIFIC(piv), &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetrf_gpu returned error %d: %s.",
info, magma_strerror(info));
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
magma_sgetri_gpu(N, A_ptr, N, APPLY_SPECIFIC(piv), APPLY_SPECIFIC(dwork),
ldwork, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d", info);
"GpuMagmaMatrixInverse: magma_sgetri_gpu returned error %d: %s.",
info, magma_strerror(info));
magma_finalize();
cuda_exit(c->ctx);
return 1;
}
magma_finalize();
cuda_exit(c->ctx);
}
*_A_inv = A_inv;
return 0;
......
......@@ -2006,13 +2006,10 @@ def local_inplace_cholesky(node):
return [GpuCholesky(lower=node.op.lower, inplace=True)(*node.inputs)]
@register_opt('fast_compile')
@register_opt('magma', 'fast_compile')
@op_lifter([nlinalg.MatrixInverse])
@register_opt2([theano.tensor.nlinalg.MatrixInverse], 'fast_compile')
@register_opt2([theano.tensor.nlinalg.MatrixInverse], 'magma', 'fast_compile')
def local_gpu_matrix_inverse(op, context_name, inputs, outputs):
magma_available = True
if not magma_available:
return
return GpuMagmaMatrixInverse()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论