提交 a2fe5c5d authored 作者: Caglar's avatar Caglar

used ls again.

上级 c4bcd45e
...@@ -19,6 +19,7 @@ if cula is not None: ...@@ -19,6 +19,7 @@ if cula is not None:
import numpy import numpy
class GpuSolve(GpuOp): class GpuSolve(GpuOp):
""" """
CULA GPU solver OP. CULA GPU solver OP.
...@@ -55,53 +56,58 @@ class GpuSolve(GpuOp): ...@@ -55,53 +56,58 @@ class GpuSolve(GpuOp):
def thunk(): def thunk():
input_shape = inputs[1][0].shape input_shape = inputs[1][0].shape
#size of the matrices to invert #size of the matrices to invert
z = outputs[0] z = outputs[0]
#Matrix #Matrix
A = inputs[0][0] A = inputs[0][0]
#Solution vectors #Solution vectors
b = inputs[1][0] b = inputs[1][0]
A_cpy = A.copy()
b_cpy = b.copy()
A_pycuda = to_gpuarray(A_cpy) #A_cpy = A.copy()
b_pycuda = to_gpuarray(b_cpy) #b_cpy = b.copy()
A_pycuda = to_gpuarray(A)
b_pycuda = to_gpuarray(b)
def cula_gpu_solve(A, b): def cula_gpu_solve(A, b, trans='N'):
A_shape = A.shape A_shape = A.shape
b_shape = b.shape b_shape = b.shape
assert(len(A_shape) == 2) assert(len(A_shape) == 2)
assert(len(b_shape) == 2) assert(len(b_shape) == 2)
import string
if A_shape[0] != A_shape[1]: if trans in ['T', 'C']:
raise ValueError('Coefficient matrix should be a square matrix.') l, n = A_shape
k, m = b_shape
elif trans in ['N']:
n, l = A_shape
k, m = b_shape
else:
raise ValueError('Invalid value for trans')
n = A_shape[0] if n != k:
nrhs = b_shape[1] raise ValueError('A and b must be aligned.')
#Create the integer pivot vector to store the indices for
#permutation matrix.
ipiv = CudaNdarray.zeros((n,))
ipiv = to_gpuarray(ipiv)
import string if trans == 'N':
lda = max(1, n) lda = max(1, n)
ldb = max(1, n) else:
lda = max(1, l)
ldb = max(1, k)
# construct pointer arrays needed for culaDeviceSgels # construct pointer arrays needed for culaDeviceSgels
# Cula requires you to pass a pointer for A and b. # Cula requires you to pass a pointer for A and b.
A_ptr = A_cpy.gpudata A_ptr = A.gpudata
b_ptr = b_cpy.gpudata b_ptr = b.gpudata
ipiv_ptr = ipiv.gpudata
cula.culaDeviceSgesv(n, nrhs, A_ptr, lda, ipiv_ptr, b_ptr, ldb) cula.culaDeviceSgels(trans, n, l, m, A_ptr, lda, b_ptr, ldb)
return A, b return A, b
A_pycuda, b_pycuda = cula_gpu_solve(A_pycuda, b_pycuda) A_pycuda, b_pycuda = cula_gpu_solve(A_pycuda, b_pycuda, self.trans)
z[0] = b z[0] = b
thunk.inputs = inputs thunk.inputs = inputs
...@@ -110,4 +116,4 @@ class GpuSolve(GpuOp): ...@@ -110,4 +116,4 @@ class GpuSolve(GpuOp):
return thunk return thunk
gpu_solve = GpuSolve() gpu_solve = GpuSolve(trans="T")
...@@ -52,7 +52,8 @@ class TestCula(unittest.TestCase): ...@@ -52,7 +52,8 @@ class TestCula(unittest.TestCase):
def test_orth_solve(self): def test_orth_solve(self):
A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
A_orth = numpy.linalg.svd(A_val)[0] A_orth = numpy.linalg.svd(A_val)[0]
x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1], A_orth.shape[0])).astype("float32") #import ipdb; ipdb.set_trace()
x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1], 1)).astype("float32")
self.run_gpu_solve(A_orth, x_val) self.run_gpu_solve(A_orth, x_val)
def test_uni_rand_solve(self): def test_uni_rand_solve(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论