提交 e05a3ea2 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Update magma qr to use params type

上级 b728a250
...@@ -569,8 +569,17 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase): ...@@ -569,8 +569,17 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase):
---------- ----------
complete : If `False`, returns only r. complete : If `False`, returns only r.
.. warning::
Because of implementation constraints, this Op returns outputs
in order ``R, Q``. Use :func:`theano.gpuarray.linalg.gpu_qr`
to get them in expected order ``Q, R``.
""" """
__props__ = ('complete', ) __props__ = ('complete', )
_cop_num_inputs = 1
_cop_num_outputs = 2
check_input = False
params_type = ParamsType(complete=bool_t, context=gpu_context_type)
def __init__(self, complete=True): def __init__(self, complete=True):
self.complete = complete self.complete = complete
...@@ -585,15 +594,37 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase): ...@@ -585,15 +594,37 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase):
if A.dtype != 'float32': if A.dtype != 'float32':
raise TypeError("only `float32` is supported for now") raise TypeError("only `float32` is supported for now")
if self.complete: if self.complete:
return theano.Apply(self, [A], [A.type(), A.type()]) return theano.Apply(self, [A],
# return R, Q
[A.type(), A.type()])
else: else:
return theano.Apply(self, [A], [A.type()]) return theano.Apply(self, [A],
# return R
[A.type()])
def get_op_params(self): def get_params(self, node):
params = [] return self.params_type.get_params(self, context=node.inputs[0].type.context)
if self.complete:
params.append(('COMPLETE', '1'))
return params def gpu_qr(a, complete=True):
"""
This function performs the QR on GPU.
Parameters
----------
complete : bool, optional
If `False`, returns only r.
Returns
-------
Q, R : matrices
"""
out = GpuMagmaQR(complete)(a)
if complete:
R, Q = out
out = [Q, R]
return out
class GpuMagmaEigh(GpuMagmaBase): class GpuMagmaEigh(GpuMagmaBase):
......
...@@ -33,11 +33,9 @@ static PyGpuArrayObject *pygpu_narrow(PyGpuArrayObject *src, size_t dim, ...@@ -33,11 +33,9 @@ static PyGpuArrayObject *pygpu_narrow(PyGpuArrayObject *src, size_t dim,
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_, int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
#ifdef COMPLETE
PyGpuArrayObject **Q,
#endif
PyGpuArrayObject **R, PyGpuArrayObject **R,
PyGpuContextObject *c) { PyGpuArrayObject **Q, // may be NULL
PARAMS_TYPE* params) {
PyGpuArrayObject *A = NULL; PyGpuArrayObject *A = NULL;
magma_int_t M, N, K, nb, ldwork; magma_int_t M, N, K, nb, ldwork;
size_t n2; size_t n2;
...@@ -56,20 +54,27 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_, ...@@ -56,20 +54,27 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
"GpuMagmaQR: requires data to be C-contiguous"); "GpuMagmaQR: requires data to be C-contiguous");
return -1; return -1;
} }
// This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx);
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaQR: requires data to be C-contiguous");
goto fail;
}
if (PyGpuArray_NDIM(A) != 2) { if (PyGpuArray_NDIM(A) != 2) {
PyErr_SetString(PyExc_ValueError, "GpuMagmaQR: matrix rank error"); PyErr_SetString(PyExc_ValueError, "GpuMagmaQR: matrix rank error");
return -1; goto fail;
} }
A = pygpu_copy(A_, GA_F_ORDER); A = pygpu_copy(A_, GA_F_ORDER);
if (A == NULL) { if (A == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaQR: failed to change to column-major order"); "GpuMagmaQR: failed to change to column-major order");
return -1; goto fail;
} }
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
// magma matrix qr // magma matrix qr
M = PyGpuArray_DIM(A, 0); M = PyGpuArray_DIM(A, 0);
N = PyGpuArray_DIM(A, 1); N = PyGpuArray_DIM(A, 1);
...@@ -83,7 +88,7 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_, ...@@ -83,7 +88,7 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
nb = magma_get_sgeqrf_nb(M, N); nb = magma_get_sgeqrf_nb(M, N);
ldwork = (2 * K + magma_roundup(N, 32)) * nb; ldwork = (2 * K + magma_roundup(N, 32)) * nb;
work_data = gpudata_alloc(c->ctx, ldwork * sizeof(float), NULL, 0, NULL); work_data = gpudata_alloc(params->context->ctx, ldwork * sizeof(float), NULL, 0, NULL);
if (work_data == NULL) { if (work_data == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaQR: failed to allocate working memory"); "GpuMagmaQR: failed to allocate working memory");
...@@ -111,38 +116,38 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_, ...@@ -111,38 +116,38 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
goto fail; goto fail;
} }
#ifdef COMPLETE if (params->complete) {
// compute Q // compute Q
Py_XDECREF(A); Py_XDECREF(A);
A = pygpu_copy(A_, GA_F_ORDER); A = pygpu_copy(A_, GA_F_ORDER);
if (A == NULL) { if (A == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaQR: failed to change to column-major order"); "GpuMagmaQR: failed to change to column-major order");
return -1; return -1;
} }
magma_sgeqrf_gpu(M, N, (float *)PyGpuArray_DEV_DATA(A), M, tau_data, magma_sgeqrf_gpu(M, N, (float *)PyGpuArray_DEV_DATA(A), M, tau_data,
*(float **)work_data, &info); *(float **)work_data, &info);
if (info != 0) { if (info != 0) {
PyErr_Format( PyErr_Format(
PyExc_RuntimeError, PyExc_RuntimeError,
"GpuMagmaQR: magma_sgeqrf_gpu argument %d has an illegal value", -info); "GpuMagmaQR: magma_sgeqrf_gpu argument %d has an illegal value", -info);
goto fail; goto fail;
} }
magma_sorgqr_gpu(M, K, K, (float *)PyGpuArray_DEV_DATA(A), M, tau_data, magma_sorgqr_gpu(M, K, K, (float *)PyGpuArray_DEV_DATA(A), M, tau_data,
*(float **)work_data, nb, &info); *(float **)work_data, nb, &info);
if (info != 0) { if (info != 0) {
PyErr_Format( PyErr_Format(
PyExc_RuntimeError, PyExc_RuntimeError,
"GpuMagmaQR: magma_sorgqr_gpu argument %d has an illegal value", -info); "GpuMagmaQR: magma_sorgqr_gpu argument %d has an illegal value", -info);
goto fail; goto fail;
} }
*Q = pygpu_narrow(A, 1, K); *Q = pygpu_narrow(A, 1, K);
if (*Q == NULL) { if (*Q == NULL) {
PyErr_SetString(PyExc_RuntimeError, "GpuMagmaQR: failed to narrow array"); PyErr_SetString(PyExc_RuntimeError, "GpuMagmaQR: failed to narrow array");
goto fail; goto fail;
}
} }
#endif
res = 0; res = 0;
fail: fail:
if (tau_data != NULL) if (tau_data != NULL)
...@@ -150,6 +155,6 @@ fail: ...@@ -150,6 +155,6 @@ fail:
if (work_data != NULL) if (work_data != NULL)
gpudata_release(work_data); gpudata_release(work_data);
Py_XDECREF(A); Py_XDECREF(A);
cuda_exit(c->ctx); cuda_exit(params->context->ctx);
return res; return res;
} }
...@@ -75,7 +75,7 @@ from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims ...@@ -75,7 +75,7 @@ from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax from .reduction import GpuMaxAndArgmax
from .linalg import (GpuCusolverSolve, MATRIX_STRUCTURES_SOLVE, GpuCholesky, from .linalg import (GpuCusolverSolve, MATRIX_STRUCTURES_SOLVE, GpuCholesky,
cusolver_available, GpuMagmaMatrixInverse, gpu_svd, cusolver_available, GpuMagmaMatrixInverse, gpu_svd,
GpuMagmaCholesky, GpuMagmaQR, GpuMagmaEigh) GpuMagmaCholesky, gpu_qr, GpuMagmaEigh)
_logger = logging.getLogger("theano.gpuarray.opt") _logger = logging.getLogger("theano.gpuarray.opt")
...@@ -2181,11 +2181,13 @@ def local_gpu_magma_qr(op, context_name, inputs, outputs): ...@@ -2181,11 +2181,13 @@ def local_gpu_magma_qr(op, context_name, inputs, outputs):
return return
if inputs[0].dtype not in ['float16', 'float32']: if inputs[0].dtype not in ['float16', 'float32']:
return return
op = GpuMagmaQR(complete=True) x = inputs[0]
if inputs[0].dtype == 'float16': if inputs[0].dtype == 'float16':
outputs = op(inputs[0].astype('float32')) x = inputs[0].astype('float32')
return [o.astype('float16') for o in outputs] out = gpu_qr(x, complete=True)
return op if inputs[0].dtype == 'float16':
return [o.astype('float16') for o in out]
return out
@register_opt('magma', 'fast_compile') @register_opt('magma', 'fast_compile')
...@@ -2196,10 +2198,13 @@ def local_gpu_magma_qr_incomplete(op, context_name, inputs, outputs): ...@@ -2196,10 +2198,13 @@ def local_gpu_magma_qr_incomplete(op, context_name, inputs, outputs):
return return
if inputs[0].dtype not in ['float16', 'float32']: if inputs[0].dtype not in ['float16', 'float32']:
return return
op = GpuMagmaQR(complete=False) x = inputs[0]
if inputs[0].dtype == 'float16': if inputs[0].dtype == 'float16':
return op(inputs[0].astype('float32')).astype('float16') x = inputs[0].astype('float32')
return op out = gpu_qr(x, complete=False)
if inputs[0].dtype == 'float16':
return [out.astype('float16')]
return out
# Matrix inverse # Matrix inverse
......
...@@ -11,7 +11,7 @@ from theano.gpuarray.linalg import (GpuCholesky, GpuMagmaCholesky, ...@@ -11,7 +11,7 @@ from theano.gpuarray.linalg import (GpuCholesky, GpuMagmaCholesky,
GpuMagmaEigh, GpuMagmaMatrixInverse, GpuMagmaEigh, GpuMagmaMatrixInverse,
GpuMagmaQR, GpuMagmaSVD, GpuMagmaQR, GpuMagmaSVD,
cusolver_available, gpu_matrix_inverse, cusolver_available, gpu_matrix_inverse,
gpu_solve, gpu_svd) gpu_solve, gpu_svd, gpu_qr)
from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull, from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull,
QRIncomplete, eigh, matrix_inverse, qr) QRIncomplete, eigh, matrix_inverse, qr)
from theano.tensor.slinalg import Cholesky, cholesky from theano.tensor.slinalg import Cholesky, cholesky
...@@ -376,7 +376,7 @@ class TestMagma(unittest.TestCase): ...@@ -376,7 +376,7 @@ class TestMagma(unittest.TestCase):
def run_gpu_qr(self, A_val, complete=True): def run_gpu_qr(self, A_val, complete=True):
A = theano.tensor.fmatrix("A") A = theano.tensor.fmatrix("A")
fn = theano.function([A], GpuMagmaQR(complete=complete)(A), fn = theano.function([A], gpu_qr(A, complete=complete),
mode=mode_with_gpu) mode=mode_with_gpu)
return fn(A_val) return fn(A_val)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论