提交 b1f09ae4 authored 作者: wonghang's avatar wonghang

Add code for L_op of solve, but failed in triangular one

上级 80fc28a5
...@@ -279,6 +279,39 @@ class GpuCusolverSolve(Op): ...@@ -279,6 +279,39 @@ class GpuCusolverSolve(Op):
z[0] = b z[0] = b
def L_op(self, inputs, outputs, output_gradients):
"""
Modified from theano/tensor/slinalg.py
"""
A, b = inputs
c = outputs[0]
c_bar = output_gradients[0]
trans_map = {
'lower_triangular': 'upper_triangular',
'upper_triangular': 'lower_triangular',
}
trans_map2 = {
'N': 'T',
'T': 'N',
}
# if self.A_structure == 'lower_triangular':
# trans_solve_op = GpuCublasTriangularSolve(lower=False)
# elif self.A_structure == 'upper_triangular':
# trans_solve_op = GpuCublasTriangularSolve(lower=True)
# else:
trans_solve_op = GpuCusolverSolve(
# update A_structure and lower to account for a transpose operation
A_structure=trans_map.get(self.A_structure,self.A_structure),
# trans=trans_map2[self.trans],
)
b_bar = trans_solve_op(A.T, c_bar)
# force outer product if vector second input
A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
if self.A_structure == 'lower_triangular':
A_bar = tensor.tril(A_bar)
elif self.A_structure == 'upper_triangular':
A_bar = tensor.triu(A_bar)
return [A_bar, b_bar]
class GpuCublasTriangularSolve(Op): class GpuCublasTriangularSolve(Op):
""" """
...@@ -400,7 +433,6 @@ class GpuCublasTriangularSolve(Op): ...@@ -400,7 +433,6 @@ class GpuCublasTriangularSolve(Op):
x[0] = b x[0] = b
def gpu_solve(A, b, A_structure='general', trans='N'): def gpu_solve(A, b, A_structure='general', trans='N'):
if A_structure == 'lower': if A_structure == 'lower':
return GpuCublasTriangularSolve(True, trans)(A, b) return GpuCublasTriangularSolve(True, trans)(A, b)
...@@ -409,12 +441,6 @@ def gpu_solve(A, b, A_structure='general', trans='N'): ...@@ -409,12 +441,6 @@ def gpu_solve(A, b, A_structure='general', trans='N'):
return GpuCusolverSolve(A_structure, trans)(A, b) return GpuCusolverSolve(A_structure, trans)(A, b)
# added these to make the module consistent to theano/tensor/slinalg.py
def gpu_solve_lower_triangular(A,b):
return GpuCublasTriangularSolve(True,'N')(A,b)
def gpu_solve_upper_triangular(A,b):
return GpuCublasTriangularSolve(False,'N')(A,b)
class GpuCholesky(Op): class GpuCholesky(Op):
""" """
CUSOLVER GPU Cholesky Op. CUSOLVER GPU Cholesky Op.
......
...@@ -7,10 +7,12 @@ from numpy.linalg.linalg import LinAlgError ...@@ -7,10 +7,12 @@ from numpy.linalg.linalg import LinAlgError
import theano import theano
from theano import config from theano import config
from theano.gpuarray.linalg import (GpuCholesky, GpuMagmaCholesky, from theano.gpuarray.linalg import (GpuCusolverSolve,GpuCublasTriangularSolve,
GpuCholesky, GpuMagmaCholesky,
GpuMagmaEigh, GpuMagmaMatrixInverse, GpuMagmaEigh, GpuMagmaMatrixInverse,
GpuMagmaQR, GpuMagmaSVD, GpuMagmaQR, GpuMagmaSVD,
cusolver_available, gpu_matrix_inverse, cusolver_available, gpu_matrix_inverse,
gpu_cholesky,
gpu_solve, gpu_svd, gpu_qr) gpu_solve, gpu_svd, gpu_qr)
from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull, from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull,
QRIncomplete, eigh, matrix_inverse, qr) QRIncomplete, eigh, matrix_inverse, qr)
...@@ -122,6 +124,37 @@ class TestCusolver(unittest.TestCase): ...@@ -122,6 +124,37 @@ class TestCusolver(unittest.TestCase):
fn = theano.function([A, b], [solver], mode=mode_with_gpu) fn = theano.function([A, b], [solver], mode=mode_with_gpu)
self.assertRaises(LinAlgError, fn, A_val, x_val) self.assertRaises(LinAlgError, fn, A_val, x_val)
def verify_solve_grad(self, m, n, A_structure, lower, rng):
# ensure diagonal elements of A relatively large to avoid numerical
# precision issues
A_val = (rng.normal(size=(m, m)) * 0.5 +
np.eye(m)).astype(config.floatX)
if A_structure == 'lower_triangular':
A_val = np.tril(A_val)
elif A_structure == 'upper_triangular':
A_val = np.triu(A_val)
if n is None:
b_val = rng.normal(size=m).astype(config.floatX)
else:
b_val = rng.normal(size=(m, n)).astype(config.floatX)
eps = None
if config.floatX == "float64":
eps = 2e-8
solve_op = GpuCusolverSolve(A_structure=A_structure)
utt.verify_grad(solve_op, [A_val, b_val], 3, rng, eps=eps)
def test_solve_grad(self):
rng = np.random.RandomState(utt.fetch_seed())
# structures = ['general', 'lower_triangular', 'upper_triangular']
structures = ['general']
for A_structure in structures:
lower = (A_structure == 'lower_triangular')
# self.verify_solve_grad(5, None, A_structure, lower, rng)
self.verify_solve_grad(6, 1, A_structure, lower, rng)
self.verify_solve_grad(4, 3, A_structure, lower, rng)
# lower should have no effect for A_structure == 'general' so also
# check lower=True case
self.verify_solve_grad(4, 3, 'general', lower=True, rng=rng)
class TestGpuCholesky(unittest.TestCase): class TestGpuCholesky(unittest.TestCase):
...@@ -558,3 +591,34 @@ class TestMagma(unittest.TestCase): ...@@ -558,3 +591,34 @@ class TestMagma(unittest.TestCase):
isinstance(node.op, GpuMagmaEigh) isinstance(node.op, GpuMagmaEigh)
for node in fn.maker.fgraph.toposort() for node in fn.maker.fgraph.toposort()
]) ])
# copied from theano/tensor/tests/test_slinalg.py
def test_cholesky_grad():
rng = np.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
# The dots are inside the graph since Cholesky needs separable matrices
# Check the default.
yield (lambda: utt.verify_grad(lambda r: gpu_cholesky(r.dot(r.T)),
[r], 3, rng))
# Explicit lower-triangular.
yield (lambda: utt.verify_grad(lambda r: GpuCholesky(lower=True)(r.dot(r.T)),
[r], 3, rng))
# Explicit upper-triangular.
yield (lambda: utt.verify_grad(lambda r: GpuCholesky(lower=False)(r.dot(r.T)),
[r], 3, rng))
def test_cholesky_grad_indef():
x = theano.tensor.matrix()
matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX)
cholesky = GpuCholesky(lower=True)
chol_f = theano.function([x], theano.tensor.grad(gpu_cholesky(x).sum(), [x]))
with assert_raises(scipy.linalg.LinAlgError):
chol_f(matrix)
# cholesky = GpuCholesky(lower=True, on_error='nan')
# chol_f = function([x], grad(gpu_cholesky(x).sum(), [x]))
# assert np.all(np.isnan(chol_f(matrix)))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论