提交 020fd553 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Update magma eigh to use params

上级 e05a3ea2
...@@ -638,7 +638,12 @@ class GpuMagmaEigh(GpuMagmaBase): ...@@ -638,7 +638,12 @@ class GpuMagmaEigh(GpuMagmaBase):
compute_v : If `True`, computes eigenvalues and eigenvectors (`True`, compute_v : If `True`, computes eigenvalues and eigenvectors (`True`,
default). If `False`, computes only eigenvalues of matrix. default). If `False`, computes only eigenvalues of matrix.
""" """
__props__ = ('lower', ) __props__ = ('lower', 'compute_v')
_cop_num_inputs = 1
_cop_num_outputs = 2
check_input = False
params_type = ParamsType(lower=bool_t, compute_v=bool_t,
context=gpu_context_type)
def __init__(self, UPLO='L', compute_v=True): def __init__(self, UPLO='L', compute_v=True):
assert UPLO in ['L', 'U'] assert UPLO in ['L', 'U']
...@@ -656,18 +661,15 @@ class GpuMagmaEigh(GpuMagmaBase): ...@@ -656,18 +661,15 @@ class GpuMagmaEigh(GpuMagmaBase):
raise TypeError("only `float32` is supported for now") raise TypeError("only `float32` is supported for now")
if self.compute_v: if self.compute_v:
return theano.Apply(self, [A], return theano.Apply(self, [A],
# return D, V
[GpuArrayType(A.dtype, broadcastable=[False], [GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)(), context_name=ctx_name)(),
A.type()]) A.type()])
else: else:
return theano.Apply(self, [A], return theano.Apply(self, [A],
# return D
[GpuArrayType(A.dtype, broadcastable=[False], [GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)()]) context_name=ctx_name)()])
def get_op_params(self): def get_params(self, node):
params = [] return self.params_type.get_params(self, context=node.inputs[0].type.context)
if self.lower:
params.append(('LOWER', '1'))
if self.compute_v:
params.append(('COMPUTE_V', '1'))
return params
...@@ -39,7 +39,7 @@ setup_ext_cuda(); ...@@ -39,7 +39,7 @@ setup_ext_cuda();
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L, int APPLY_SPECIFIC(magma_cholesky)(PyGpuArrayObject *A, PyGpuArrayObject **L,
PARAMS_TYPE* params) { PARAMS_TYPE *params) {
const size_t *dims; const size_t *dims;
size_t N, n2; size_t N, n2;
magma_uplo_t ul; magma_uplo_t ul;
......
...@@ -6,10 +6,8 @@ setup_ext_cuda(); ...@@ -6,10 +6,8 @@ setup_ext_cuda();
int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_, int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
PyGpuArrayObject **D, PyGpuArrayObject **D,
#ifdef COMPUTE_V PyGpuArrayObject **V, // may be NULL
PyGpuArrayObject **V, PARAMS_TYPE *params) {
#endif
PyGpuContextObject *c) {
PyGpuArrayObject *A = NULL; PyGpuArrayObject *A = NULL;
magma_int_t N, liwork, *iwork_data = NULL; magma_int_t N, liwork, *iwork_data = NULL;
size_t d_dims[1], v_dims[2]; size_t d_dims[1], v_dims[2];
...@@ -23,21 +21,26 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_, ...@@ -23,21 +21,26 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
"GpuMagmaEigh: Unsupported data type"); "GpuMagmaEigh: Unsupported data type");
return -1; return -1;
} }
// This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx);
if (!GpuArray_IS_C_CONTIGUOUS(&A_->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&A_->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuMagmaEigh: requires data to be C-contiguous"); "GpuMagmaEigh: requires data to be C-contiguous");
return -1; goto fail;
} }
if (PyGpuArray_NDIM(A_) != 2) { if (PyGpuArray_NDIM(A_) != 2) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuMagmaEigh: matrix rank error"); "GpuMagmaEigh: matrix rank error");
return -1; goto fail;
} }
if (PyGpuArray_DIM(A_, 0) != PyGpuArray_DIM(A_, 1)) { if (PyGpuArray_DIM(A_, 0) != PyGpuArray_DIM(A_, 1)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuMagmaEigh: matrix is not square"); "GpuMagmaEigh: matrix is not square");
return -1; goto fail;
} }
A = pygpu_copy(A_, GA_F_ORDER); A = pygpu_copy(A_, GA_F_ORDER);
if (A == NULL) { if (A == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
...@@ -45,22 +48,19 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_, ...@@ -45,22 +48,19 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
return -1; return -1;
} }
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
// magma matrix eigen decomposition of a symmetric matrix // magma matrix eigen decomposition of a symmetric matrix
N = PyGpuArray_DIM(A, 0); N = PyGpuArray_DIM(A, 0);
#ifdef LOWER if (params->lower) {
uplo = MagmaLower; uplo = MagmaLower;
#else } else {
uplo = MagmaUpper; uplo = MagmaUpper;
#endif }
#ifdef COMPUTE_V if (params->compute_v) {
jobz = MagmaVec; jobz = MagmaVec;
#else } else {
jobz = MagmaNoVec; jobz = MagmaNoVec;
#endif }
if (MAGMA_SUCCESS != magma_smalloc_pinned(&w_data, N)) { if (MAGMA_SUCCESS != magma_smalloc_pinned(&w_data, N)) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
...@@ -105,7 +105,7 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_, ...@@ -105,7 +105,7 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
} }
d_dims[0] = N; d_dims[0] = N;
if (theano_prep_output(D, 1, d_dims, A->ga.typecode, GA_C_ORDER, c) != 0){ if (theano_prep_output(D, 1, d_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaEigh: failed to allocate memory for the output"); "GpuMagmaEigh: failed to allocate memory for the output");
goto fail; goto fail;
...@@ -113,15 +113,14 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_, ...@@ -113,15 +113,14 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
cudaMemcpy(PyGpuArray_DEV_DATA(*D), w_data, N * sizeof(float), cudaMemcpy(PyGpuArray_DEV_DATA(*D), w_data, N * sizeof(float),
cudaMemcpyDeviceToDevice); cudaMemcpyDeviceToDevice);
#ifdef COMPUTE_V if (params->compute_v) {
*V = theano_try_copy(*V, A); *V = theano_try_copy(*V, A);
if (*V == NULL) { if (*V == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaEigh: failed to allocate memory for the output"); "GpuMagmaEigh: failed to allocate memory for the output");
goto fail; goto fail;
} }
#endif }
res = 0; res = 0;
fail: fail:
if (w_data != NULL) if (w_data != NULL)
...@@ -133,6 +132,6 @@ fail: ...@@ -133,6 +132,6 @@ fail:
if (iwork_data != NULL) if (iwork_data != NULL)
magma_free_cpu(iwork_data); magma_free_cpu(iwork_data);
Py_XDECREF(A); Py_XDECREF(A);
cuda_exit(c->ctx); cuda_exit(params->context->ctx);
return res; return res;
} }
...@@ -5,7 +5,7 @@ setup_ext_cuda(); ...@@ -5,7 +5,7 @@ setup_ext_cuda();
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **A_inv, int APPLY_SPECIFIC(magma_inv)(PyGpuArrayObject *A, PyGpuArrayObject **A_inv,
PARAMS_TYPE* params) { PARAMS_TYPE *params) {
const size_t *dims; const size_t *dims;
magma_int_t N, ldwork, info; magma_int_t N, ldwork, info;
magma_int_t *piv = NULL; magma_int_t *piv = NULL;
......
...@@ -49,11 +49,6 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_, ...@@ -49,11 +49,6 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
"GpuMagmaQR: Unsupported data type"); "GpuMagmaQR: Unsupported data type");
return -1; return -1;
} }
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaQR: requires data to be C-contiguous");
return -1;
}
// This is early to match the exit() in the fail label. // This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx); cuda_enter(params->context->ctx);
......
...@@ -8,7 +8,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, ...@@ -8,7 +8,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
PyGpuArrayObject **S, PyGpuArrayObject **S,
PyGpuArrayObject **U, // may be NULL PyGpuArrayObject **U, // may be NULL
PyGpuArrayObject **VT, // may be NULL PyGpuArrayObject **VT, // may be NULL
PARAMS_TYPE* params) { PARAMS_TYPE *params) {
bool compute_uv = (U != NULL); bool compute_uv = (U != NULL);
magma_int_t *iwork = NULL, iunused[1]; magma_int_t *iwork = NULL, iunused[1];
magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info; magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论