Add magma gpu eigh (eigen decompositon for a symmetric matrix)

da1374e8 · Alexander Matyasko · deedf6d5 · da1374e8 · da1374e8
--- a/theano/gpuarray/linalg.py
+++ b/theano/gpuarray/linalg.py
@@ -593,7 +593,7 @@ class GpuMagmaQR(CGpuKernelBase):

    """
    params_type = gpu_context_type
-    __props__ = ('complete',)
+    __props__ = ('complete', )

    def __init__(self, complete=True):
        self.complete = complete
@@ -636,3 +636,68 @@ class GpuMagmaQR(CGpuKernelBase):
        if self.complete:
            params.append(('COMPLETE', '1'))
        return params
+
+
+class GpuMagmaEigh(COp):
+    """Computes the eigen decomposition of a symmetric matrix :math:`A` using magma
+    library.
+
+    Parameters
+    ----------
+    UPLO : Specifies whether the calculation is done with the lower triangular
+           part of matrix (`L`, default) or the upper triangular part (`U`).
+    compute_v : If `True`, computes eigenvalues and eigenvectors (`True`,
+                default). If `False`, computes only eigenvalues of matrix.
+    """
+    __props__ = ('lower', )
+    params_type = gpu_context_type
+
+    def __init__(self, UPLO='L', compute_v=True):
+        assert UPLO in ['L', 'U']
+        self.lower = UPLO == 'L'
+        self.compute_v = compute_v
+        COp.__init__(self, ['magma_eigh.c'], 'APPLY_SPECIFIC(magma_eigh)')
+
+    def c_headers(self):
+        return ['gpuarray/types.h', 'gpuarray/array.h', 'gpuarray/ext_cuda.h',
+                'gpuarray_helper.h', 'magma.h']
+
+    def c_header_dirs(self):
+        dirs = [os.path.dirname(__file__), pygpu.get_include()]
+        if config.magma.include_path:
+            dirs.append(config.magma.include_path)
+        return dirs
+
+    def c_libraries(self):
+        return ['magma']
+
+    def c_lib_dirs(self):
+        if config.magma.library_path:
+            return [config.magma.library_path]
+        return []
+
+    def make_node(self, A):
+        ctx_name = infer_context_name(A)
+        A = as_gpuarray_variable(A, ctx_name)
+        if A.ndim != 2:
+            raise LinAlgError("Matrix rank error")
+        if self.compute_v:
+            return theano.Apply(self, [A],
+                                [GpuArrayType(A.dtype, broadcastable=[False],
+                                              context_name=ctx_name)(),
+                                A.type()])
+        else:
+            return theano.Apply(self, [A],
+                                [GpuArrayType(A.dtype, broadcastable=[False],
+                                              context_name=ctx_name)()])
+
+    def get_params(self, node):
+        return node.inputs[0].type.context
+
+    def get_op_params(self):
+        params = []
+        if self.lower:
+            params.append(('LOWER', '1'))
+        if self.compute_v:
+            params.append(('COMPUTE_V', '1'))
+        return params
--- a/theano/gpuarray/magma_eigh.c
+++ b/theano/gpuarray/magma_eigh.c
+#section init_code
+
+setup_ext_cuda();
+
+#section support_code_struct
+
+int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
+                               PyGpuArrayObject **D,
+#ifdef COMPUTE_V
+                               PyGpuArrayObject **V,
+#endif
+                               PyGpuContextObject *c) {
+  PyGpuArrayObject *A = NULL;
+  magma_int_t N, liwork, *iwork_data = NULL;
+  size_t d_dims[1], v_dims[2];
+  magma_uplo_t uplo;
+  magma_vec_t jobz;
+  float *w_data = NULL, *wA_data = NULL, *work_data = NULL, lwork;
+  int res = -1, info;
+
+  if (A_->ga.typecode != GA_FLOAT) {
+    PyErr_SetString(PyExc_TypeError,
+                    "GpuMagmaEigh: Unsupported data type");
+    return -1;
+  }
+  if (!GpuArray_IS_C_CONTIGUOUS(&A_->ga)) {
+    PyErr_SetString(PyExc_ValueError,
+                    "GpuMagmaEigh: requires data to be C-contiguous");
+    return -1;
+  }
+  if (PyGpuArray_NDIM(A_) != 2) {
+    PyErr_SetString(PyExc_ValueError,
+                    "GpuMagmaEigh: matrix rank error");
+    return -1;
+  }
+  if (PyGpuArray_DIM(A_, 0) != PyGpuArray_DIM(A_, 1)) {
+    PyErr_SetString(PyExc_ValueError,
+                    "GpuMagmaEigh: matrix is not square");
+    return -1;
+  }
+  A = pygpu_copy(A_, GA_F_ORDER);
+  if (A == NULL) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    "GpuMagmaQR: failed to change to column-major order");
+    return -1;
+  }
+
+  // This is early to match the exit() in the fail label.
+  cuda_enter(c->ctx);
+  magma_init();
+
+  // magma matrix eigen decomposition of a symmetric matrix
+  N = PyGpuArray_DIM(A, 0);
+
+#ifdef LOWER
+  uplo = MagmaLower;
+#else
+  uplo = MagmaUpper;
+#endif
+#ifdef COMPUTE_V
+  jobz = MagmaVec;
+#else
+  jobz = MagmaNoVec;
+#endif
+
+  if (MAGMA_SUCCESS != magma_smalloc_pinned(&w_data, N)) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    "GpuMagmaSVD: failed to allocate working memory");
+    goto fail;
+  }
+  if (MAGMA_SUCCESS != magma_smalloc_pinned(&wA_data, N * N)) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    "GpuMagmaSVD: failed to allocate working memory");
+    goto fail;
+  }
+
+  // query for workspace size
+  magma_ssyevd_gpu(jobz, uplo, N, NULL, N, NULL, NULL, N, &lwork,
+                   -1, &liwork, -1, &info);
+
+  if (MAGMA_SUCCESS != magma_smalloc_pinned(&work_data, (size_t)lwork)) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    "GpuMagmaEigh: failed to allocate working memory");
+    goto fail;
+  }
+  if (MAGMA_SUCCESS != magma_imalloc_cpu(&iwork_data, liwork)) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    "GpuMagmaEigh: failed to allocate working memory");
+    goto fail;
+  }
+
+  magma_ssyevd_gpu(jobz, uplo, N, (float *)PyGpuArray_DEV_DATA(A), N, w_data,
+                   wA_data, N, work_data, (size_t)lwork, iwork_data, liwork,
+                   &info);
+  if (info > 0) {
+    PyErr_Format(
+        PyExc_RuntimeError,
+        "GpuMagmaEigh:  %d off-diagonal elements of an didn't converge to zero",
+        info);
+    goto fail;
+  } else if (info < 0) {
+    PyErr_Format(
+        PyExc_RuntimeError,
+        "GpuMagmaEigh: magma_ssyevd_gpu argument %d has an illegal value", -info);
+    goto fail;
+  }
+
+  d_dims[0] = N;
+  if (theano_prep_output(D, 1, d_dims, A->ga.typecode, GA_C_ORDER, c) != 0){
+    PyErr_SetString(PyExc_RuntimeError,
+                    "GpuMagmaEigh: failed to allocate memory for the output");
+    goto fail;
+  }
+  cudaMemcpy(PyGpuArray_DEV_DATA(*D), w_data, N * sizeof(float),
+             cudaMemcpyDeviceToDevice);
+
+#ifdef COMPUTE_V
+  *V = theano_try_copy(*V, A);
+  if (*V == NULL) {
+    PyErr_SetString(PyExc_RuntimeError,
+                    "GpuMagmaEigh: failed to allocate memory for the output");
+    goto fail;
+  }
+#endif
+
+  res = 0;
+fail:
+  if (w_data != NULL)
+    magma_free_pinned(w_data);
+  if (wA_data != NULL)
+    magma_free_pinned(wA_data);
+  if (work_data != NULL)
+    magma_free_pinned(work_data);
+  if (iwork_data != NULL)
+    magma_free_cpu(iwork_data);
+  magma_finalize();
+  cuda_exit(c->ctx);
+  return res;
+}