提交 5f1e372d authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Fix make node for gpu svd and add infer shape

上级 625e75cd
......@@ -8,7 +8,7 @@ import numpy as np
from numpy.linalg.linalg import LinAlgError
import theano
from theano import Op, config
from theano import Op, config, tensor
from theano.gof import COp
from theano.gpuarray import GpuArrayType
......@@ -382,20 +382,39 @@ class GpuMagmaSVD(COp):
A = as_gpuarray_variable(A, ctx_name)
if A.ndim != 2:
raise LinAlgError("Matrix rank error")
return theano.Apply(self, [A],
[A.type(),
GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)(),
A.type()])
if self.compute_uv:
return theano.Apply(self, [A],
[A.type(),
GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)(),
A.type()])
else:
return theano.Apply(self, [A],
[GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)()])
def get_params(self, node):
return node.inputs[0].type.context
def get_op_params(self):
compute_uv = int(self.compute_uv)
full_matrices = int(self.full_matrices)
return [('COMPUTE_UV', compute_uv),
('FULL_MATRICES', full_matrices)]
params = []
if self.compute_uv:
params.append(('COMPUTE_UV', '1'))
if self.full_matrices:
params.append(('FULL_MATRICES', '1'))
return params
def infer_shape(self, node, shapes):
x_shape, = shapes
M, N = x_shape
K = tensor.minimum(M, N)
s_shape = (K, )
if self.compute_uv:
u_shape = (M, M) if self.full_matrices else (M, K)
vt_shape = (N, N) if self.full_matrices else (K, N)
return [u_shape, s_shape, vt_shape]
else:
return [s_shape]
def gpu_svd(a, full_matrices=1, compute_uv=1):
......
......@@ -4,8 +4,14 @@ setup_ext_cuda();
#section support_code_struct
int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
PyGpuArrayObject **S, PyGpuArrayObject **VT,
int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
#ifdef COMPUTE_UV
PyGpuArrayObject **U,
#endif
PyGpuArrayObject **S,
#ifdef COMPUTE_UV
PyGpuArrayObject **VT,
#endif
PyGpuContextObject *c) {
magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info;
magma_vec_t jobu, jobv;
......@@ -56,37 +62,35 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
goto fail;
}
if (COMPUTE_UV) {
if (FULL_MATRICES) {
jobu = MagmaAllVec;
jobv = MagmaAllVec;
}
else {
jobu = MagmaSomeVec;
jobv = MagmaSomeVec;
}
M_U = (jobu == MagmaAllVec ? M : K);
N_VT = (jobv == MagmaAllVec ? N : K);
ldu = M;
ldv = N_VT;
if (MAGMA_SUCCESS != magma_smalloc_pinned(&u_data, M_U * M)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
if (MAGMA_SUCCESS != magma_smalloc_pinned(&vt_data, N * N_VT)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
#ifdef COMPUTE_UV
#ifdef FULL_MATRICES
jobu = MagmaAllVec;
jobv = MagmaAllVec;
#else
jobu = MagmaSomeVec;
jobv = MagmaSomeVec;
#endif
M_U = (jobu == MagmaAllVec ? M : K);
N_VT = (jobv == MagmaAllVec ? N : K);
ldu = M;
ldv = N_VT;
if (MAGMA_SUCCESS != magma_smalloc_pinned(&u_data, M_U * M)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
else {
jobu = MagmaNoVec;
jobv = MagmaNoVec;
ldu = M;
ldv = N;
if (MAGMA_SUCCESS != magma_smalloc_pinned(&vt_data, N * N_VT)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
#else
jobu = MagmaNoVec;
jobv = MagmaNoVec;
ldu = M;
ldv = N;
#endif
// query for workspace size
magma_sgesvd(jobu, jobv, M, N, NULL, M, NULL, NULL, ldu, NULL, ldv,
......@@ -124,6 +128,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
cudaMemcpy(PyGpuArray_DEV_DATA(*S), s_data, K * sizeof(float),
cudaMemcpyDeviceToDevice);
#ifdef COMPUTE_UV
u_dims[0] = N; u_dims[1] = N_VT;
if (theano_prep_output(U, 2, u_dims, A->ga.typecode, GA_C_ORDER, c) != 0){
PyErr_SetString(PyExc_RuntimeError,
......@@ -145,7 +150,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
// to match numpy.linalg.svd output
cudaMemcpy(PyGpuArray_DEV_DATA(*VT), u_data, M_U * M * sizeof(float),
cudaMemcpyDeviceToDevice);
#endif
res = 0;
fail:
if (a_data != NULL)
......
......@@ -292,7 +292,7 @@ class TestMagma(unittest.TestCase):
mode=mode_with_gpu.including('magma'))
A_val = rand(50, 100)
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val)[1])
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
A_val = rand(100, 50)
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val)[1])
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论