提交 a8996904 authored 作者: Thomas George's avatar Thomas George

gpu cholesky now using tril/triu from pygpu.basic

上级 8289fc6b
...@@ -16,8 +16,10 @@ from numpy.linalg.linalg import LinAlgError ...@@ -16,8 +16,10 @@ from numpy.linalg.linalg import LinAlgError
try: try:
import pygpu import pygpu
from pygpu.basic import triu, tril
pygpu_available = True
except ImportError: except ImportError:
pass pygpu_available = False
cusolver_available = False cusolver_available = False
try: try:
...@@ -63,44 +65,6 @@ def attach_cusolver_handle_to_context(ctx): ...@@ -63,44 +65,6 @@ def attach_cusolver_handle_to_context(ctx):
ctx.cusolver_handle = cusolver.cusolverDnCreate() ctx.cusolver_handle = cusolver.cusolverDnCreate()
def tril(A, ctx):
tmpl = Template("""
KERNEL void tril(GLOBAL_MEM ga_float *a, ga_uint N) {
unsigned int idx = blockIdx.y*blockDim.x*gridDim.x+
blockIdx.x*blockDim.x+threadIdx.x;
unsigned int ix = idx/${cols};
unsigned int iy = idx%${cols};
if (idx < N) {
if (ix < iy)
a[idx] = 0.0;
}
}
""")
src = tmpl.substitute(cols=A.shape[0])
spec = [GpuArray, 'uint32']
k = GpuKernel(src, "tril", spec, context=ctx)
return k(A, A.shape[0] * A.shape[1], n=A.shape[0])
def triu(A, ctx):
tmpl = Template("""
KERNEL void triu(GLOBAL_MEM ga_float *a, ga_uint N) {
unsigned int idx = blockIdx.y*blockDim.x*gridDim.x+
blockIdx.x*blockDim.x+threadIdx.x;
unsigned int ix = idx/${cols};
unsigned int iy = idx%${cols};
if (idx < N) {
if (ix > iy)
a[idx] = 0.0;
}
}
""")
src = tmpl.substitute(cols=A.shape[0])
spec = [GpuArray, 'uint32']
k = GpuKernel(src, "triu", spec, context=ctx)
return k(A, A.shape[0] * A.shape[1], n=A.shape[0])
class GpuCusolverSolve(Op): class GpuCusolverSolve(Op):
""" """
CUSOLVER GPU solver OP. CUSOLVER GPU solver OP.
...@@ -288,9 +252,12 @@ class GpuCholesky(Op): ...@@ -288,9 +252,12 @@ class GpuCholesky(Op):
def make_node(self, inp): def make_node(self, inp):
if not cusolver_available: if not cusolver_available:
raise RuntimeError('CUSOLVER is not available and ' raise RuntimeError('CUSOLVER is not available and '
'GpuCusolverSolve Op can not be constructed.') 'GpuCholesky Op can not be constructed.')
if skcuda.__version__ <= '0.5.1': if skcuda.__version__ <= '0.5.1':
warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8') warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8')
if not pygpu_available:
raise RuntimeError('Missing pygpu or triu/tril functions.'
'Try updating libgpuarray?')
context_name = basic_ops.infer_context_name(inp) context_name = basic_ops.infer_context_name(inp)
inp = basic_ops.as_gpuarray_variable(inp, context_name) inp = basic_ops.as_gpuarray_variable(inp, context_name)
...@@ -367,9 +334,9 @@ class GpuCholesky(Op): ...@@ -367,9 +334,9 @@ class GpuCholesky(Op):
# upper or lower triangle unchanged, so we need to put zeros outside # upper or lower triangle unchanged, so we need to put zeros outside
# the triangle # the triangle
# Note : we should probably check for c or f order in triu instead of here # Note : we should probably check for c or f order in triu instead of here
if self.lower and L.flags['C_CONTIGUOUS'] or (not self.lower and L.flags['F_CONTIGUOUS']): if self.lower:
tril(L, context) tril(L)
else: else:
triu(L, context) triu(L)
outputs[0][0] = L outputs[0][0] = L
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论