提交 6d7e4eab authored 作者: Thomas George's avatar Thomas George

cholesky work in progress

上级 d49b5368
......@@ -19,6 +19,7 @@ cusolver_available = False
try:
import skcuda
from skcuda import cusolver
from skcuda import linalg
cusolver_available = True
except (ImportError, OSError, RuntimeError, pkg_resources.DistributionNotFound):
pass
......@@ -52,6 +53,13 @@ if cusolver_available:
cusolver.cusolverCheckStatus(status)
def attach_handle_to_context(ctx):
handle = getattr(ctx, 'cusolver_handle', None)
if handle is None:
with ctx:
ctx.cusolver_handle = cusolver.cusolverDnCreate()
class GpuCusolverSolve(Op):
"""
CUSOLVER GPU solver OP.
......@@ -101,10 +109,7 @@ class GpuCusolverSolve(Op):
def prepare_node(self, node, storage_map, compute_map, impl):
ctx = node.inputs[0].type.context
handle = getattr(ctx, 'cusolver_handle', None)
if handle is None:
with ctx:
ctx.cusolver_handle = cusolver.cusolverDnCreate()
attach_handle_to_context(ctx)
def check_dev_info(self, dev_info):
val = np.asarray(dev_info)[0]
......@@ -212,3 +217,114 @@ class GpuCusolverSolve(Op):
def gpu_solve(A, b, A_structure='general', trans='N'):
return GpuCusolverSolve(A_structure, trans)(A, b)
class GpuCholesky(Op):
"""
CUSOLVER GPU Cholesky Op.
Given a real positive definite matrix `A` returns either a lower
triangular matrix `L` such that `A == dot(L, L.T)` if `lower == True`
else returns an upper triangular matrix `U` such that `A == dot(U.T, U)`
if `lower == False`.
Parameters
----------
lower
Whether to return a lower rather than upper triangular decomposition.
"""
__props__ = ('lower', 'inplace')
def __init__(self, lower=True, inplace=False):
self.lower = lower
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [0, 1]}
super(GpuCholesky, self).__init__()
def make_node(self, inp):
if not cusolver_available:
raise RuntimeError('CUSOLVER is not available and '
'GpuCusolverSolve Op can not be constructed.')
if skcuda.__version__ <= '0.5.1':
warnings.warn('The GpuSolve op requires scikit-cuda > 0.5.1 to work with CUDA 8')
context_name = basic_ops.infer_context_name(inp)
inp = basic_ops.as_gpuarray_variable(inp, context_name)
inp = basic_ops.gpu_contiguous(inp)
# this op can only operate on float32 matrices
assert inp.ndim == 2
assert inp.dtype == 'float32'
return theano.Apply(
self, [inp],
[GpuArrayType('float32',
broadcastable=inp.broadcastable,
context_name=context_name)()])
def prepare_node(self, node, storage_map, compute_map, impl):
ctx = node.inputs[0].type.context
attach_handle_to_context(ctx)
def perform(self, node, inputs, outputs):
context = inputs[0][0].context
# Input matrix.
A = inputs[0]
assert(len(A.shape) == 2)
l, n = A.shape
if l != n:
raise ValueError('A must be a square matrix')
lda = max(1, n)
# cusolver operates on F ordered matrices, but A is expected
# to be symmetric so it does not matter.
# We copy A if needed
if self.inplace:
L = A
else:
L = pygpu.array(A, copy=True)
# The output matrix will contain only the upper or lower
# triangular factorization of A. If L is C ordered (it
# probably is as it is the default in Theano) we just switch
# the fill mode parameter of cusolver
l_parameter = 0 if self.lower else 1
if L.flags['C_CONTIGUOUS']:
l_parameter = 1 - l_parameter
L_ptr = L.gpudata
with context:
workspace_size = cusolver.cusolverDnSpotrf_bufferSize(
context.cusolver_handle, l_parameter, n, L_ptr, lda)
workspace = pygpu.zeros(workspace_size, dtype='float32',
context=context)
dev_info = pygpu.zeros((1,), dtype='int32', context=context)
workspace_ptr = workspace.gpudata
dev_info_ptr = dev_info.gpudata
with context:
cusolver.cusolverDnSpotrf(
context.cusolver_handle, l_parameter, n, L_ptr, lda, workspace_ptr,
workspace_size, dev_info_ptr)
# cusolver leaves the elements in the matrix outside the considered
# upper or lower triangle unchanged, so we need to put zeros outside
# the triangle
if self.lower:
linalg.tril(L, overwrite=True)
else:
linalg.triu(L, overwrite=True)
outputs[0][0] = L
......@@ -11,7 +11,7 @@ from numpy.linalg.linalg import LinAlgError
# Skip tests if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
from theano.gpuarray.linalg import (cusolver_available, gpu_solve)
from theano.gpuarray.linalg import (cusolver_available, gpu_solve, GpuCholesky)
if not cusolver_available:
raise SkipTest('Optional package scikits.cuda.cusolver not available')
......@@ -112,3 +112,105 @@ class TestCusolver(unittest.TestCase):
fn = theano.function([A, b], [solver], mode=mode_with_gpu)
self.assertRaises(LinAlgError, fn, A_val, x_val)
class TestGpuCholesky(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def get_gpu_cholesky_func(self, lower=True, inplace=False):
""" Helper function to compile function from GPU Cholesky op. """
A = theano.tensor.matrix("A", dtype="float32")
cholesky_op = GpuCholesky(lower=lower, inplace=inplace)
chol_A = cholesky_op(A)
return theano.function([A], chol_A, accept_inplace=inplace)
def compare_gpu_cholesky_to_numpy(self, A_val, lower=True, inplace=False):
""" Helper function to compare op output to numpy.cholesky output. """
chol_A_val = numpy.linalg.cholesky(A_val)
if not lower:
chol_A_val = chol_A_val.T
fn = self.get_gpu_cholesky_func(lower, inplace)
res = fn(A_val)
chol_A_res = numpy.array(res)
utt.assert_allclose(chol_A_res, chol_A_val)
def test_invalid_input_fail_non_square(self):
""" Invalid Cholesky input test with non-square matrix as input. """
A_val = numpy.random.normal(size=(3, 2)).astype("float32")
fn = self.get_gpu_cholesky_func(True, False)
self.assertRaises(ValueError, fn, A_val)
def test_invalid_input_fail_non_symmetric(self):
pass
""" Invalid Cholesky input test with non-symmetric input.
(Non-symmetric real input must also be non-positive definite). """
A_val = numpy.random.normal(size=(3, 3)).astype("float32")
# double-check random A_val is asymmetric - the probability of this
# not being the case even with finite precision should be negligible
assert not numpy.allclose(A_val, A_val.T)
fn = self.get_gpu_cholesky_func(True, False)
self.assertRaises(cula.cula.culaError, fn, A_val)
def test_invalid_input_fail_negative_definite(self):
""" Invalid Cholesky input test with negative-definite input. """
M_val = numpy.random.normal(size=(3, 3)).astype("float32")
# A = -M.dot(M) will be negative definite for all non-singular M
A_val = -M_val.dot(M_val.T)
fn = self.get_gpu_cholesky_func(True, False)
self.assertRaises(cula.cula.culaError, fn, A_val)
def test_invalid_input_fail_vector(self):
""" Invalid Cholesky input test with vector as input. """
def invalid_input_func():
A = theano.tensor.vector("A", dtype="float32")
GpuCholesky(lower=True, inplace=False)(A)
self.assertRaises(AssertionError, invalid_input_func)
def test_invalid_input_fail_tensor3(self):
""" Invalid Cholesky input test with 3D tensor as input. """
def invalid_input_func():
A = theano.tensor.tensor3("A", dtype="float32")
GpuCholesky(lower=True, inplace=False)(A)
self.assertRaises(AssertionError, invalid_input_func)
def test_diag_chol(self):
""" Diagonal matrix input Cholesky test. """
# make sure all diagonal elements are positive so positive-definite
A_val = numpy.diag(numpy.random.uniform(size=5).astype("float32") + 1)
self.compare_gpu_cholesky_to_numpy(A_val, lower=True, inplace=False)
def test_dense_chol_lower(self):
""" Dense matrix input lower-triangular Cholesky test. """
M_val = numpy.random.normal(size=(3, 3)).astype("float32")
# A = M.dot(M) will be positive definite for all non-singular M
A_val = M_val.dot(M_val.T)
self.compare_gpu_cholesky_to_numpy(A_val, lower=True, inplace=False)
def test_dense_chol_upper(self):
""" Dense matrix input upper-triangular Cholesky test. """
M_val = numpy.random.normal(size=(3, 3)).astype("float32")
# A = M.dot(M) will be positive definite for all non-singular M
A_val = M_val.dot(M_val.T)
self.compare_gpu_cholesky_to_numpy(A_val, lower=False, inplace=False)
def test_diag_chol_inplace(self):
""" Diagonal matrix input inplace Cholesky test. """
# make sure all diagonal elements are positive so positive-definite
A_val = numpy.diag(numpy.random.uniform(size=5).astype("float32") + 1)
self.compare_gpu_cholesky_to_numpy(A_val, lower=True, inplace=True)
def test_dense_chol_lower_inplace(self):
""" Dense matrix input lower-triangular inplace Cholesky test. """
M_val = numpy.random.normal(size=(3, 3)).astype("float32")
# A = M.dot(M) will be positive definite for all non-singular M
A_val = M_val.dot(M_val.T)
self.compare_gpu_cholesky_to_numpy(A_val, lower=True, inplace=True)
def test_dense_chol_upper_inplace(self):
""" Dense matrix input upper-triangular inplace Cholesky test. """
M_val = numpy.random.normal(size=(3, 3)).astype("float32")
# A = M.dot(M) will be positive definite for all non-singular M
A_val = M_val.dot(M_val.T)
self.compare_gpu_cholesky_to_numpy(A_val, lower=False, inplace=True)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论