提交 9ebd5b09 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Fix gpu cholesky inplace optimization

上级 f0a8c9ba
...@@ -41,7 +41,7 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_, ...@@ -41,7 +41,7 @@ int APPLY_SPECIFIC(magma_eigh)(PyGpuArrayObject *A_,
A = pygpu_copy(A_, GA_F_ORDER); A = pygpu_copy(A_, GA_F_ORDER);
if (A == NULL) { if (A == NULL) {
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"GpuMagmaQR: failed to change to column-major order"); "GpuMagmaEigh: failed to change to column-major order");
return -1; return -1;
} }
......
...@@ -312,15 +312,18 @@ class TestMagma(unittest.TestCase): ...@@ -312,15 +312,18 @@ class TestMagma(unittest.TestCase):
mode=mode_with_gpu.excluding('cusolver')) mode=mode_with_gpu.excluding('cusolver'))
return f(A_val) return f(A_val)
def check_cholesky(self, N, lower=True, rtol=None, atol=None): def rand_symmetric(self, N):
A = rand(N, N).astype('float32') A = rand(N, N).astype('float32')
# ensure that eigenvalues are not too small which sometimes results in # ensure that eigenvalues are not too small which sometimes results in
# magma failure due to gpu limited numerical precision # magma cholesky failure due to gpu limited numerical precision
D, W = np.linalg.eigh(A) D, W = np.linalg.eigh(A)
D[D < 1] = 1 D[D < 1] = 1
V_m = np.zeros_like(A) V_m = np.zeros_like(A)
np.fill_diagonal(V_m, D) np.fill_diagonal(V_m, D)
A = np.dot(np.dot(W.T, V_m), W) return np.dot(np.dot(W.T, V_m), W)
def check_cholesky(self, N, lower=True, rtol=None, atol=None):
A = self.rand_symmetric(N)
L = self.run_gpu_cholesky(A, lower=lower) L = self.run_gpu_cholesky(A, lower=lower)
if not lower: if not lower:
L = L.T L = L.T
...@@ -337,9 +340,7 @@ class TestMagma(unittest.TestCase): ...@@ -337,9 +340,7 @@ class TestMagma(unittest.TestCase):
for node in fn.maker.fgraph.toposort()]) for node in fn.maker.fgraph.toposort()])
def test_gpu_cholesky_inplace(self): def test_gpu_cholesky_inplace(self):
N = 1000 A = self.rand_symmetric(1000)
A = rand(N, N).astype('float32')
A = np.dot(A.T, A)
A_gpu = gpuarray_shared_constructor(A) A_gpu = gpuarray_shared_constructor(A)
A_copy = A_gpu.get_value() A_copy = A_gpu.get_value()
fn = theano.function([], GpuMagmaCholesky(inplace=True)(A_gpu), fn = theano.function([], GpuMagmaCholesky(inplace=True)(A_gpu),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论