提交 e05a3ea2 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Update magma qr to use params type

上级 b728a250
......@@ -569,8 +569,17 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase):
----------
complete : If `False`, returns only r.
.. warning::
Because of implementation constraints, this Op returns outputs
in order ``R, Q``. Use :func:`theano.gpuarray.linalg.gpu_qr`
to get them in expected order ``Q, R``.
"""
__props__ = ('complete', )
_cop_num_inputs = 1
_cop_num_outputs = 2
check_input = False
params_type = ParamsType(complete=bool_t, context=gpu_context_type)
def __init__(self, complete=True):
self.complete = complete
......@@ -585,15 +594,37 @@ class GpuMagmaQR(GpuMagmaBase, CGpuKernelBase):
if A.dtype != 'float32':
raise TypeError("only `float32` is supported for now")
if self.complete:
return theano.Apply(self, [A], [A.type(), A.type()])
return theano.Apply(self, [A],
# return R, Q
[A.type(), A.type()])
else:
return theano.Apply(self, [A], [A.type()])
return theano.Apply(self, [A],
# return R
[A.type()])
def get_op_params(self):
params = []
if self.complete:
params.append(('COMPLETE', '1'))
return params
def get_params(self, node):
return self.params_type.get_params(self, context=node.inputs[0].type.context)
def gpu_qr(a, complete=True):
"""
This function performs the QR on GPU.
Parameters
----------
complete : bool, optional
If `False`, returns only r.
Returns
-------
Q, R : matrices
"""
out = GpuMagmaQR(complete)(a)
if complete:
R, Q = out
out = [Q, R]
return out
class GpuMagmaEigh(GpuMagmaBase):
......
......@@ -33,11 +33,9 @@ static PyGpuArrayObject *pygpu_narrow(PyGpuArrayObject *src, size_t dim,
#section support_code_struct
int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
#ifdef COMPLETE
PyGpuArrayObject **Q,
#endif
PyGpuArrayObject **R,
PyGpuContextObject *c) {
PyGpuArrayObject **Q, // may be NULL
PARAMS_TYPE* params) {
PyGpuArrayObject *A = NULL;
magma_int_t M, N, K, nb, ldwork;
size_t n2;
......@@ -56,20 +54,27 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
"GpuMagmaQR: requires data to be C-contiguous");
return -1;
}
// This is early to match the exit() in the fail label.
cuda_enter(params->context->ctx);
if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
PyErr_SetString(PyExc_ValueError,
"GpuMagmaQR: requires data to be C-contiguous");
goto fail;
}
if (PyGpuArray_NDIM(A) != 2) {
PyErr_SetString(PyExc_ValueError, "GpuMagmaQR: matrix rank error");
return -1;
goto fail;
}
A = pygpu_copy(A_, GA_F_ORDER);
if (A == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaQR: failed to change to column-major order");
return -1;
goto fail;
}
// This is early to match the exit() in the fail label.
cuda_enter(c->ctx);
// magma matrix qr
M = PyGpuArray_DIM(A, 0);
N = PyGpuArray_DIM(A, 1);
......@@ -83,7 +88,7 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
nb = magma_get_sgeqrf_nb(M, N);
ldwork = (2 * K + magma_roundup(N, 32)) * nb;
work_data = gpudata_alloc(c->ctx, ldwork * sizeof(float), NULL, 0, NULL);
work_data = gpudata_alloc(params->context->ctx, ldwork * sizeof(float), NULL, 0, NULL);
if (work_data == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaQR: failed to allocate working memory");
......@@ -111,38 +116,38 @@ int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
goto fail;
}
#ifdef COMPLETE
// compute Q
Py_XDECREF(A);
A = pygpu_copy(A_, GA_F_ORDER);
if (A == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaQR: failed to change to column-major order");
return -1;
}
magma_sgeqrf_gpu(M, N, (float *)PyGpuArray_DEV_DATA(A), M, tau_data,
*(float **)work_data, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaQR: magma_sgeqrf_gpu argument %d has an illegal value", -info);
goto fail;
}
if (params->complete) {
// compute Q
Py_XDECREF(A);
A = pygpu_copy(A_, GA_F_ORDER);
if (A == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaQR: failed to change to column-major order");
return -1;
}
magma_sgeqrf_gpu(M, N, (float *)PyGpuArray_DEV_DATA(A), M, tau_data,
*(float **)work_data, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaQR: magma_sgeqrf_gpu argument %d has an illegal value", -info);
goto fail;
}
magma_sorgqr_gpu(M, K, K, (float *)PyGpuArray_DEV_DATA(A), M, tau_data,
*(float **)work_data, nb, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaQR: magma_sorgqr_gpu argument %d has an illegal value", -info);
goto fail;
}
*Q = pygpu_narrow(A, 1, K);
if (*Q == NULL) {
PyErr_SetString(PyExc_RuntimeError, "GpuMagmaQR: failed to narrow array");
goto fail;
magma_sorgqr_gpu(M, K, K, (float *)PyGpuArray_DEV_DATA(A), M, tau_data,
*(float **)work_data, nb, &info);
if (info != 0) {
PyErr_Format(
PyExc_RuntimeError,
"GpuMagmaQR: magma_sorgqr_gpu argument %d has an illegal value", -info);
goto fail;
}
*Q = pygpu_narrow(A, 1, K);
if (*Q == NULL) {
PyErr_SetString(PyExc_RuntimeError, "GpuMagmaQR: failed to narrow array");
goto fail;
}
}
#endif
res = 0;
fail:
if (tau_data != NULL)
......@@ -150,6 +155,6 @@ fail:
if (work_data != NULL)
gpudata_release(work_data);
Py_XDECREF(A);
cuda_exit(c->ctx);
cuda_exit(params->context->ctx);
return res;
}
......@@ -75,7 +75,7 @@ from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
from .reduction import GpuMaxAndArgmax
from .linalg import (GpuCusolverSolve, MATRIX_STRUCTURES_SOLVE, GpuCholesky,
cusolver_available, GpuMagmaMatrixInverse, gpu_svd,
GpuMagmaCholesky, GpuMagmaQR, GpuMagmaEigh)
GpuMagmaCholesky, gpu_qr, GpuMagmaEigh)
_logger = logging.getLogger("theano.gpuarray.opt")
......@@ -2181,11 +2181,13 @@ def local_gpu_magma_qr(op, context_name, inputs, outputs):
return
if inputs[0].dtype not in ['float16', 'float32']:
return
op = GpuMagmaQR(complete=True)
x = inputs[0]
if inputs[0].dtype == 'float16':
outputs = op(inputs[0].astype('float32'))
return [o.astype('float16') for o in outputs]
return op
x = inputs[0].astype('float32')
out = gpu_qr(x, complete=True)
if inputs[0].dtype == 'float16':
return [o.astype('float16') for o in out]
return out
@register_opt('magma', 'fast_compile')
......@@ -2196,10 +2198,13 @@ def local_gpu_magma_qr_incomplete(op, context_name, inputs, outputs):
return
if inputs[0].dtype not in ['float16', 'float32']:
return
op = GpuMagmaQR(complete=False)
x = inputs[0]
if inputs[0].dtype == 'float16':
return op(inputs[0].astype('float32')).astype('float16')
return op
x = inputs[0].astype('float32')
out = gpu_qr(x, complete=False)
if inputs[0].dtype == 'float16':
return [out.astype('float16')]
return out
# Matrix inverse
......
......@@ -11,7 +11,7 @@ from theano.gpuarray.linalg import (GpuCholesky, GpuMagmaCholesky,
GpuMagmaEigh, GpuMagmaMatrixInverse,
GpuMagmaQR, GpuMagmaSVD,
cusolver_available, gpu_matrix_inverse,
gpu_solve, gpu_svd)
gpu_solve, gpu_svd, gpu_qr)
from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull,
QRIncomplete, eigh, matrix_inverse, qr)
from theano.tensor.slinalg import Cholesky, cholesky
......@@ -376,7 +376,7 @@ class TestMagma(unittest.TestCase):
def run_gpu_qr(self, A_val, complete=True):
A = theano.tensor.fmatrix("A")
fn = theano.function([A], GpuMagmaQR(complete=complete)(A),
fn = theano.function([A], gpu_qr(A, complete=complete),
mode=mode_with_gpu)
return fn(A_val)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论