提交 5f1e372d authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Fix make node for gpu svd and add infer shape

上级 625e75cd
...@@ -8,7 +8,7 @@ import numpy as np ...@@ -8,7 +8,7 @@ import numpy as np
from numpy.linalg.linalg import LinAlgError from numpy.linalg.linalg import LinAlgError
import theano import theano
from theano import Op, config from theano import Op, config, tensor
from theano.gof import COp from theano.gof import COp
from theano.gpuarray import GpuArrayType from theano.gpuarray import GpuArrayType
...@@ -382,20 +382,39 @@ class GpuMagmaSVD(COp): ...@@ -382,20 +382,39 @@ class GpuMagmaSVD(COp):
A = as_gpuarray_variable(A, ctx_name) A = as_gpuarray_variable(A, ctx_name)
if A.ndim != 2: if A.ndim != 2:
raise LinAlgError("Matrix rank error") raise LinAlgError("Matrix rank error")
return theano.Apply(self, [A], if self.compute_uv:
[A.type(), return theano.Apply(self, [A],
GpuArrayType(A.dtype, broadcastable=[False], [A.type(),
context_name=ctx_name)(), GpuArrayType(A.dtype, broadcastable=[False],
A.type()]) context_name=ctx_name)(),
A.type()])
else:
return theano.Apply(self, [A],
[GpuArrayType(A.dtype, broadcastable=[False],
context_name=ctx_name)()])
def get_params(self, node): def get_params(self, node):
return node.inputs[0].type.context return node.inputs[0].type.context
def get_op_params(self): def get_op_params(self):
compute_uv = int(self.compute_uv) params = []
full_matrices = int(self.full_matrices) if self.compute_uv:
return [('COMPUTE_UV', compute_uv), params.append(('COMPUTE_UV', '1'))
('FULL_MATRICES', full_matrices)] if self.full_matrices:
params.append(('FULL_MATRICES', '1'))
return params
def infer_shape(self, node, shapes):
x_shape, = shapes
M, N = x_shape
K = tensor.minimum(M, N)
s_shape = (K, )
if self.compute_uv:
u_shape = (M, M) if self.full_matrices else (M, K)
vt_shape = (N, N) if self.full_matrices else (K, N)
return [u_shape, s_shape, vt_shape]
else:
return [s_shape]
def gpu_svd(a, full_matrices=1, compute_uv=1): def gpu_svd(a, full_matrices=1, compute_uv=1):
......
...@@ -4,8 +4,14 @@ setup_ext_cuda(); ...@@ -4,8 +4,14 @@ setup_ext_cuda();
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U, int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
PyGpuArrayObject **S, PyGpuArrayObject **VT, #ifdef COMPUTE_UV
PyGpuArrayObject **U,
#endif
PyGpuArrayObject **S,
#ifdef COMPUTE_UV
PyGpuArrayObject **VT,
#endif
PyGpuContextObject *c) { PyGpuContextObject *c) {
magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info; magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info;
magma_vec_t jobu, jobv; magma_vec_t jobu, jobv;
...@@ -56,37 +62,35 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U, ...@@ -56,37 +62,35 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
goto fail; goto fail;
} }
if (COMPUTE_UV) { #ifdef COMPUTE_UV
if (FULL_MATRICES) { #ifdef FULL_MATRICES
jobu = MagmaAllVec; jobu = MagmaAllVec;
jobv = MagmaAllVec; jobv = MagmaAllVec;
} #else
else { jobu = MagmaSomeVec;
jobu = MagmaSomeVec; jobv = MagmaSomeVec;
jobv = MagmaSomeVec; #endif
} M_U = (jobu == MagmaAllVec ? M : K);
M_U = (jobu == MagmaAllVec ? M : K); N_VT = (jobv == MagmaAllVec ? N : K);
N_VT = (jobv == MagmaAllVec ? N : K); ldu = M;
ldu = M; ldv = N_VT;
ldv = N_VT;
if (MAGMA_SUCCESS != magma_smalloc_pinned(&u_data, M_U * M)) {
if (MAGMA_SUCCESS != magma_smalloc_pinned(&u_data, M_U * M)) { PyErr_SetString(PyExc_RuntimeError,
PyErr_SetString(PyExc_RuntimeError, "GpuMagmaSVD: failed to allocate memory");
"GpuMagmaSVD: failed to allocate memory"); goto fail;
goto fail;
}
if (MAGMA_SUCCESS != magma_smalloc_pinned(&vt_data, N * N_VT)) {
PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaSVD: failed to allocate memory");
goto fail;
}
} }
else { if (MAGMA_SUCCESS != magma_smalloc_pinned(&vt_data, N * N_VT)) {
jobu = MagmaNoVec; PyErr_SetString(PyExc_RuntimeError,
jobv = MagmaNoVec; "GpuMagmaSVD: failed to allocate memory");
ldu = M; goto fail;
ldv = N;
} }
#else
jobu = MagmaNoVec;
jobv = MagmaNoVec;
ldu = M;
ldv = N;
#endif
// query for workspace size // query for workspace size
magma_sgesvd(jobu, jobv, M, N, NULL, M, NULL, NULL, ldu, NULL, ldv, magma_sgesvd(jobu, jobv, M, N, NULL, M, NULL, NULL, ldu, NULL, ldv,
...@@ -124,6 +128,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U, ...@@ -124,6 +128,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
cudaMemcpy(PyGpuArray_DEV_DATA(*S), s_data, K * sizeof(float), cudaMemcpy(PyGpuArray_DEV_DATA(*S), s_data, K * sizeof(float),
cudaMemcpyDeviceToDevice); cudaMemcpyDeviceToDevice);
#ifdef COMPUTE_UV
u_dims[0] = N; u_dims[1] = N_VT; u_dims[0] = N; u_dims[1] = N_VT;
if (theano_prep_output(U, 2, u_dims, A->ga.typecode, GA_C_ORDER, c) != 0){ if (theano_prep_output(U, 2, u_dims, A->ga.typecode, GA_C_ORDER, c) != 0){
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
...@@ -145,7 +150,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U, ...@@ -145,7 +150,7 @@ int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A, PyGpuArrayObject **U,
// to match numpy.linalg.svd output // to match numpy.linalg.svd output
cudaMemcpy(PyGpuArray_DEV_DATA(*VT), u_data, M_U * M * sizeof(float), cudaMemcpy(PyGpuArray_DEV_DATA(*VT), u_data, M_U * M * sizeof(float),
cudaMemcpyDeviceToDevice); cudaMemcpyDeviceToDevice);
#endif
res = 0; res = 0;
fail: fail:
if (a_data != NULL) if (a_data != NULL)
......
...@@ -292,7 +292,7 @@ class TestMagma(unittest.TestCase): ...@@ -292,7 +292,7 @@ class TestMagma(unittest.TestCase):
mode=mode_with_gpu.including('magma')) mode=mode_with_gpu.including('magma'))
A_val = rand(50, 100) A_val = rand(50, 100)
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val)[1]) utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
A_val = rand(100, 50) A_val = rand(100, 50)
utt.assert_allclose(f_cpu(A_val), f_gpu(A_val)[1]) utt.assert_allclose(f_cpu(A_val), f_gpu(A_val))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论