Unverified 提交 813faf6a authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #6653 from wonghang/potrf64_and_Lop

float64 support for GpuCholesky, GpuCusolverSolve, GpuCublasTriangularSolve and their L_op
...@@ -1712,3 +1712,114 @@ KERNEL void eye(GLOBAL_MEM %(ctype)s *a, ga_size a_off, ...@@ -1712,3 +1712,114 @@ KERNEL void eye(GLOBAL_MEM %(ctype)s *a, ga_size a_off,
def c_code_cache_version(self): def c_code_cache_version(self):
return (10,) return (10,)
class GpuTri(GpuKernelBase, Op):
"""
Tri for GPU.
"""
__props__ = ('dtype', 'context_name')
_f16_ok = True
def __init__(self, dtype=None, context_name=None):
if dtype is None:
dtype = config.floatX
self.dtype = dtype
self.context_name = context_name
def get_params(self, node):
return get_context(self.context_name)
def make_node(self, n, m, k):
n = tensor.as_tensor_variable(n)
m = tensor.as_tensor_variable(m)
k = tensor.as_tensor_variable(k)
assert n.ndim == 0
assert m.ndim == 0
assert k.ndim == 0
otype = GpuArrayType(dtype=self.dtype,
broadcastable=(False, False),
context_name=self.context_name)
return Apply(self, [n, m, k], [otype()])
def infer_shape(self, node, in_shapes):
out_shape = [node.inputs[0], node.inputs[1]]
return [out_shape]
def grad(self, inp, grads):
return [grad_undefined(self, i, inp[i])
for i in xrange(3)]
def gpu_kernels(self, node, name):
code = """#include "cluda.h"
KERNEL void tri(GLOBAL_MEM %(ctype)s *a, ga_size a_off,
ga_size n, ga_size m, ga_ssize k) {
a = (GLOBAL_MEM %(ctype)s *)(((GLOBAL_MEM char *)a) + a_off);
ga_ssize coff = max(k, (ga_ssize) 0);
ga_ssize roff = -min(k, (ga_ssize) 0);
for (ga_size i = LID_0; i < min(n - roff,n); i += LDIM_0) {
for (ga_size j = 0; j <= min(i + coff,m-1); j++) {
a[(i + roff)*m + j] = %(write_a)s(1);
}
}
}""" % dict(ctype=pygpu.gpuarray.dtype_to_ctype(self.dtype),
name=name, write_a=write_w(self.dtype))
return [Kernel(
code=code, name="tri",
params=[gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SIZE,
gpuarray.SIZE, gpuarray.SSIZE],
flags=Kernel.get_flags(self.dtype),
objvar='k_tri_' + name)]
def c_code(self, node, name, inp, out, sub):
if len(inp) == 2:
n, m = inp
k = 0
elif len(inp) == 3:
n, m, k = inp
z, = out
fail = sub['fail']
ctx = sub['params']
typecode = pygpu.gpuarray.dtype_to_typecode(self.dtype)
kname = self.gpu_kernels(node, name)[0].objvar
s = """
size_t dims[2] = {0, 0};
size_t ls, gs;
ssize_t k;
int err;
dims[0] = ((dtype_%(n)s*)PyArray_DATA(%(n)s))[0];
dims[1] = ((dtype_%(m)s*)PyArray_DATA(%(m)s))[0];
k = ((dtype_%(k)s*)PyArray_DATA(%(k)s))[0];
Py_CLEAR(%(z)s);
%(z)s = pygpu_zeros(2, dims,
%(typecode)s,
GA_C_ORDER,
%(ctx)s, Py_None);
if (%(z)s == NULL) {
%(fail)s
}
ls = 1;
gs = 256;
err = tri_call(1, &gs, &ls, 0, %(z)s->ga.data, %(z)s->ga.offset,
dims[0], dims[1], k);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"gpuarray error: kTri: %%s. n%%lu, m=%%lu.",
GpuKernel_error(&%(kname)s, err),
(unsigned long)dims[0], (unsigned long)dims[1]);
%(fail)s;
}
""" % locals()
return s
def c_code_cache_version(self):
return (1,)
差异被折叠。
...@@ -52,7 +52,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name, ...@@ -52,7 +52,7 @@ from .basic_ops import (as_gpuarray_variable, infer_context_name,
HostFromGpu, GpuFromHost, HostFromGpu, GpuFromHost,
GpuSplit, GpuContiguous, gpu_contiguous, GpuSplit, GpuContiguous, gpu_contiguous,
GpuAlloc, GpuAllocEmpty, GpuReshape, GpuAlloc, GpuAllocEmpty, GpuReshape,
GpuEye, gpu_join, GpuJoin) GpuEye, GpuTri, gpu_join, GpuJoin)
from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch, from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
gpugemm_no_inplace, gpugemm_inplace, gpugemm_no_inplace, gpugemm_inplace,
gpugemmbatch_no_inplace, gpugemmbatch_no_inplace,
...@@ -389,7 +389,8 @@ class GraphToGPU(Optimizer): ...@@ -389,7 +389,8 @@ class GraphToGPU(Optimizer):
if (not move_to_GPU and if (not move_to_GPU and
isinstance(node.op, (theano.tensor.Alloc, isinstance(node.op, (theano.tensor.Alloc,
theano.tensor.AllocEmpty, theano.tensor.AllocEmpty,
theano.tensor.basic.Eye))): theano.tensor.basic.Eye,
theano.tensor.basic.Tri))):
# If the Alloc[Empty] have a client that will be moved # If the Alloc[Empty] have a client that will be moved
# to the GPU, we should move the Alloc* on the GPU. # to the GPU, we should move the Alloc* on the GPU.
...@@ -1412,6 +1413,13 @@ def local_gpua_eye(op, context_name, inputs, outputs): ...@@ -1412,6 +1413,13 @@ def local_gpua_eye(op, context_name, inputs, outputs):
return GpuEye(dtype=op.dtype, context_name=context_name) return GpuEye(dtype=op.dtype, context_name=context_name)
@register_opt('fast_compile')
@op_lifter([tensor.basic.Tri])
@register_opt2([tensor.basic.Tri], 'fast_compile')
def local_gpua_tri(op, context_name, inputs, outputs):
return GpuTri(dtype=op.dtype, context_name=context_name)
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias]) @op_lifter([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias])
@register_opt2([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias], 'fast_compile') @register_opt2([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias], 'fast_compile')
...@@ -2583,7 +2591,7 @@ def local_gpua_images2neibs(op, context_name, inputs, outputs): ...@@ -2583,7 +2591,7 @@ def local_gpua_images2neibs(op, context_name, inputs, outputs):
@op_lifter([slinalg.Solve]) @op_lifter([slinalg.Solve])
@register_opt2([theano.tensor.slinalg.Solve], 'fast_compile') @register_opt2([theano.tensor.slinalg.Solve], 'fast_compile')
def local_gpu_solve(op, context_name, inputs, outputs): def local_gpu_solve(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float16', 'float32']: if inputs[0].dtype not in ['float16', 'float32', 'float64']:
return return
if op.A_structure not in MATRIX_STRUCTURES_SOLVE: if op.A_structure not in MATRIX_STRUCTURES_SOLVE:
return return
...@@ -2609,7 +2617,8 @@ def local_gpu_solve(op, context_name, inputs, outputs): ...@@ -2609,7 +2617,8 @@ def local_gpu_solve(op, context_name, inputs, outputs):
def local_inplace_gpu_solve(node): def local_inplace_gpu_solve(node):
if isinstance(node.op, GpuCusolverSolve) and not node.op.inplace: if isinstance(node.op, GpuCusolverSolve) and not node.op.inplace:
with inherit_stack_trace(node.outputs): with inherit_stack_trace(node.outputs):
return [GpuCusolverSolve(A_structure=node.op.A_structure, trans=node.op.trans, return [GpuCusolverSolve(A_structure=node.op.A_structure,
trans=node.op.trans,
inplace=True)(*node.inputs)] inplace=True)(*node.inputs)]
...@@ -2617,7 +2626,7 @@ def local_inplace_gpu_solve(node): ...@@ -2617,7 +2626,7 @@ def local_inplace_gpu_solve(node):
def local_gpu_cholesky(op, context_name, inputs, outputs): def local_gpu_cholesky(op, context_name, inputs, outputs):
if not cusolver_available: if not cusolver_available:
return return
if inputs[0].dtype not in ['float16', 'float32']: if inputs[0].dtype not in ['float16', 'float32', 'float64']:
return return
op = GpuCholesky(lower=op.lower, inplace=op.destructive) op = GpuCholesky(lower=op.lower, inplace=op.destructive)
if inputs[0].dtype == 'float16': if inputs[0].dtype == 'float16':
......
...@@ -20,7 +20,8 @@ from ..type import (GpuArrayType, get_context, ...@@ -20,7 +20,8 @@ from ..type import (GpuArrayType, get_context,
from ..basic_ops import ( from ..basic_ops import (
host_from_gpu, HostFromGpu, GpuFromHost, GpuReshape, GpuToGpu, host_from_gpu, HostFromGpu, GpuFromHost, GpuReshape, GpuToGpu,
GpuAlloc, GpuAllocEmpty, GpuContiguous, GpuAlloc, GpuAllocEmpty, GpuContiguous,
gpu_join, GpuJoin, GpuSplit, GpuEye, gpu_contiguous) gpu_join, GpuJoin, GpuSplit, GpuEye, GpuTri,
gpu_contiguous)
from ..elemwise import GpuDimShuffle, GpuElemwise from ..elemwise import GpuDimShuffle, GpuElemwise
from ..subtensor import GpuSubtensor from ..subtensor import GpuSubtensor
...@@ -497,3 +498,112 @@ def test_Gpujoin_inplace(): ...@@ -497,3 +498,112 @@ def test_Gpujoin_inplace():
if not isinstance(mode_with_gpu, theano.compile.DebugMode): if not isinstance(mode_with_gpu, theano.compile.DebugMode):
assert x.get_value(borrow=True, return_internal_type=True) is f(0) assert x.get_value(borrow=True, return_internal_type=True) is f(0)
assert np.allclose(f(0), [3, 4, 5]) assert np.allclose(f(0), [3, 4, 5])
def test_gpu_tril_triu():
def check_l(m, k=0):
m_symb = T.matrix(dtype=m.dtype)
k_symb = T.iscalar()
f = theano.function([m_symb, k_symb],
T.tril(m_symb, k_symb),
mode=mode_with_gpu)
result = f(m, k)
assert np.allclose(result, np.tril(m, k))
assert result.dtype == np.dtype(dtype)
assert any([isinstance(node.op, GpuTri)
for node in f.maker.fgraph.toposort()])
def check_u(m, k=0):
m_symb = T.matrix(dtype=m.dtype)
k_symb = T.iscalar()
f = theano.function([m_symb, k_symb],
T.triu(m_symb, k_symb),
mode=mode_with_gpu)
result = f(m, k)
assert np.allclose(result, np.triu(m, k))
assert result.dtype == np.dtype(dtype)
assert any([isinstance(node.op, GpuTri)
for node in f.maker.fgraph.toposort()])
utt.seed_rng()
test_rng = np.random.RandomState(seed=utt.fetch_seed())
for dtype in ['float64', 'float32', 'float16']:
# try a big one
m = np.asarray(test_rng.rand(5000, 5000) * 2 - 1, dtype=dtype)
yield check_l, m, 0
yield check_l, m, 1
yield check_l, m, -1
yield check_u, m, 0
yield check_u, m, 1
yield check_u, m, -1
m = np.asarray(test_rng.rand(10, 10) * 2 - 1, dtype=dtype)
yield check_l, m, 0
yield check_l, m, 1
yield check_l, m, -1
yield check_u, m, 0
yield check_u, m, 1
yield check_u, m, -1
m = np.asarray(test_rng.rand(10, 5) * 2 - 1, dtype=dtype)
yield check_l, m, 0
yield check_l, m, 1
yield check_l, m, -1
yield check_u, m, 0
yield check_u, m, 1
yield check_u, m, -1
def test_gputri():
def check(dtype, N, M_=None, k=0):
# Theano does not accept None as a tensor.
# So we must use a real value.
M = M_
# Currently DebugMode does not support None as inputs even if this is
# allowed.
if M is None:
M = N
N_symb = T.iscalar()
M_symb = T.iscalar()
k_symb = T.iscalar()
out = T.tri(N_symb, M_symb, k_symb, dtype=dtype) + np.array(1).astype(dtype)
f = theano.function([N_symb, M_symb, k_symb],
out,
mode=mode_with_gpu)
result = np.asarray(f(N, M, k)) - np.array(1).astype(dtype)
assert np.allclose(result, np.tri(N, M_, k, dtype=dtype))
assert result.dtype == np.dtype(dtype)
assert any([isinstance(node.op, GpuTri)
for node in f.maker.fgraph.toposort()])
for dtype in ['float64', 'float32', 'int32', 'float16']:
# try a big one
yield check, dtype, 1000, 1000, 0
yield check, dtype, 1000, 1000, -400
yield check, dtype, 1000, 1000, 400
yield check, dtype, 5
# M != N, k = 0
yield check, dtype, 3, 5
yield check, dtype, 5, 3
# N == M, k != 0
yield check, dtype, 3, 3, 1
yield check, dtype, 3, 3, -1
# N < M, k != 0
yield check, dtype, 3, 5, 1
yield check, dtype, 3, 5, -1
# N > M, k != 0
yield check, dtype, 5, 3, 1
yield check, dtype, 5, 3, -1
# k > M, -k > N, k > M, k > N
yield check, dtype, 5, 3, 3
yield check, dtype, 3, 5, 3
yield check, dtype, 5, 3, -3
yield check, dtype, 3, 5, -3
yield check, dtype, 5, 3, 6
yield check, dtype, 3, 5, -6
...@@ -7,11 +7,14 @@ from numpy.linalg.linalg import LinAlgError ...@@ -7,11 +7,14 @@ from numpy.linalg.linalg import LinAlgError
import theano import theano
from theano import config from theano import config
from theano.gpuarray.linalg import (GpuCholesky, GpuMagmaCholesky, from theano.gpuarray.linalg import (GpuCusolverSolve, GpuCublasTriangularSolve,
GpuCholesky, GpuMagmaCholesky,
GpuMagmaEigh, GpuMagmaMatrixInverse, GpuMagmaEigh, GpuMagmaMatrixInverse,
GpuMagmaQR, GpuMagmaSVD, GpuMagmaQR, GpuMagmaSVD,
cusolver_available, gpu_matrix_inverse, cusolver_available, gpu_matrix_inverse,
gpu_solve, gpu_svd, gpu_qr) gpu_cholesky,
gpu_solve, gpu_solve_lower_triangular,
gpu_svd, gpu_qr)
from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull, from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull,
QRIncomplete, eigh, matrix_inverse, qr) QRIncomplete, eigh, matrix_inverse, qr)
from theano.tensor.slinalg import Cholesky, cholesky, imported_scipy from theano.tensor.slinalg import Cholesky, cholesky, imported_scipy
...@@ -20,6 +23,7 @@ from theano.tests import unittest_tools as utt ...@@ -20,6 +23,7 @@ from theano.tests import unittest_tools as utt
from .. import gpuarray_shared_constructor from .. import gpuarray_shared_constructor
from .config import mode_with_gpu, mode_without_gpu from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand from .test_basic_ops import rand
from nose.tools import assert_raises
class TestCusolver(unittest.TestCase): class TestCusolver(unittest.TestCase):
...@@ -122,6 +126,41 @@ class TestCusolver(unittest.TestCase): ...@@ -122,6 +126,41 @@ class TestCusolver(unittest.TestCase):
fn = theano.function([A, b], [solver], mode=mode_with_gpu) fn = theano.function([A, b], [solver], mode=mode_with_gpu)
self.assertRaises(LinAlgError, fn, A_val, x_val) self.assertRaises(LinAlgError, fn, A_val, x_val)
def verify_solve_grad(self, m, n, A_structure, lower, rng):
# ensure diagonal elements of A relatively large to avoid numerical
# precision issues
A_val = (rng.normal(size=(m, m)) * 0.5 +
np.eye(m)).astype(config.floatX)
if A_structure == 'lower_triangular':
A_val = np.tril(A_val)
elif A_structure == 'upper_triangular':
A_val = np.triu(A_val)
if n is None:
b_val = rng.normal(size=m).astype(config.floatX)
else:
b_val = rng.normal(size=(m, n)).astype(config.floatX)
eps = None
if config.floatX == "float64":
eps = 2e-8
if A_structure in ('lower_triangular', 'upper_triangular'):
solve_op = GpuCublasTriangularSolve(lower=lower)
else:
solve_op = GpuCusolverSolve(A_structure="general")
utt.verify_grad(solve_op, [A_val, b_val], 3, rng, eps=eps)
def test_solve_grad(self):
rng = np.random.RandomState(utt.fetch_seed())
structures = ['general', 'lower_triangular', 'upper_triangular']
for A_structure in structures:
lower = (A_structure == 'lower_triangular')
# self.verify_solve_grad(5, None, A_structure, lower, rng)
self.verify_solve_grad(6, 1, A_structure, lower, rng)
self.verify_solve_grad(4, 3, A_structure, lower, rng)
# lower should have no effect for A_structure == 'general' so also
# check lower=True case
self.verify_solve_grad(4, 3, 'general', lower=True, rng=rng)
class TestGpuCholesky(unittest.TestCase): class TestGpuCholesky(unittest.TestCase):
...@@ -215,6 +254,98 @@ class TestGpuCholesky(unittest.TestCase): ...@@ -215,6 +254,98 @@ class TestGpuCholesky(unittest.TestCase):
self.assertRaises(LinAlgError, fn, A_val) self.assertRaises(LinAlgError, fn, A_val)
class TestGpuCholesky64(unittest.TestCase):
def setUp(self):
if not cusolver_available:
self.skipTest('Optional package scikits.cuda.cusolver not available')
utt.seed_rng()
def get_gpu_cholesky_func(self, lower=True, inplace=False):
# Helper function to compile function from GPU Cholesky op.
A = theano.tensor.matrix("A", dtype="float64")
cholesky_op = GpuCholesky(lower=lower, inplace=inplace)
chol_A = cholesky_op(A)
return theano.function([A], chol_A, accept_inplace=inplace,
mode=mode_with_gpu)
def compare_gpu_cholesky_to_np(self, A_val, lower=True, inplace=False):
# Helper function to compare op output to np.cholesky output.
chol_A_val = np.linalg.cholesky(A_val)
if not lower:
chol_A_val = chol_A_val.T
fn = self.get_gpu_cholesky_func(lower, inplace)
res = fn(A_val)
chol_A_res = np.array(res)
utt.assert_allclose(chol_A_res, chol_A_val)
def test_gpu_cholesky_opt(self):
if not imported_scipy:
self.skipTest('SciPy is not enabled, skipping test')
A = theano.tensor.matrix("A", dtype="float64")
fn = theano.function([A], cholesky(A), mode=mode_with_gpu)
assert any([isinstance(node.op, GpuCholesky)
for node in fn.maker.fgraph.toposort()])
def test_invalid_input_fail_non_square(self):
# Invalid Cholesky input test with non-square matrix as input.
A_val = np.random.normal(size=(3, 2)).astype("float64")
fn = self.get_gpu_cholesky_func(True, False)
self.assertRaises(ValueError, fn, A_val)
def test_invalid_input_fail_vector(self):
# Invalid Cholesky input test with vector as input.
def invalid_input_func():
A = theano.tensor.vector("A", dtype="float64")
GpuCholesky(lower=True, inplace=False)(A)
self.assertRaises(AssertionError, invalid_input_func)
def test_invalid_input_fail_tensor3(self):
# Invalid Cholesky input test with 3D tensor as input.
def invalid_input_func():
A = theano.tensor.tensor3("A", dtype="float64")
GpuCholesky(lower=True, inplace=False)(A)
self.assertRaises(AssertionError, invalid_input_func)
@utt.assertFailure_fast
def test_diag_chol(self):
# Diagonal matrix input Cholesky test.
for lower in [True, False]:
for inplace in [True, False]:
# make sure all diagonal elements are positive so positive-definite
A_val = np.diag(np.random.uniform(size=5).astype("float64") + 1)
self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace)
@utt.assertFailure_fast
def test_dense_chol_lower(self):
# Dense matrix input lower-triangular Cholesky test.
for lower in [True, False]:
for inplace in [True, False]:
M_val = np.random.normal(size=(3, 3)).astype("float64")
# A = M.dot(M) will be positive definite for all non-singular M
A_val = M_val.dot(M_val.T)
self.compare_gpu_cholesky_to_np(A_val, lower=lower, inplace=inplace)
def test_invalid_input_fail_non_symmetric(self):
# Invalid Cholesky input test with non-symmetric input.
# (Non-symmetric real input must also be non-positive definite).
A_val = None
while True:
A_val = np.random.normal(size=(3, 3)).astype("float64")
if not np.allclose(A_val, A_val.T):
break
fn = self.get_gpu_cholesky_func(True, False)
self.assertRaises(LinAlgError, fn, A_val)
def test_invalid_input_fail_negative_definite(self):
# Invalid Cholesky input test with negative-definite input.
M_val = np.random.normal(size=(3, 3)).astype("float64")
# A = -M.dot(M) will be negative definite for all non-singular M
A_val = -M_val.dot(M_val.T)
fn = self.get_gpu_cholesky_func(True, False)
self.assertRaises(LinAlgError, fn, A_val)
class TestMagma(unittest.TestCase): class TestMagma(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -467,3 +598,61 @@ class TestMagma(unittest.TestCase): ...@@ -467,3 +598,61 @@ class TestMagma(unittest.TestCase):
isinstance(node.op, GpuMagmaEigh) isinstance(node.op, GpuMagmaEigh)
for node in fn.maker.fgraph.toposort() for node in fn.maker.fgraph.toposort()
]) ])
# mostly copied from theano/tensor/tests/test_slinalg.py
def test_cholesky_grad():
rng = np.random.RandomState(utt.fetch_seed())
r = rng.randn(5, 5).astype(config.floatX)
# The dots are inside the graph since Cholesky needs separable matrices
# Check the default.
yield (lambda: utt.verify_grad(lambda r: gpu_cholesky(r.dot(r.T)),
[r], 3, rng))
# Explicit lower-triangular.
yield (lambda: utt.verify_grad(lambda r: GpuCholesky(lower=True)(r.dot(r.T)),
[r], 3, rng))
# Explicit upper-triangular.
yield (lambda: utt.verify_grad(lambda r: GpuCholesky(lower=False)(r.dot(r.T)),
[r], 3, rng))
def test_cholesky_grad_indef():
x = theano.tensor.matrix()
matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX)
cholesky = GpuCholesky(lower=True)
chol_f = theano.function([x], theano.tensor.grad(cholesky(x).sum(), [x]))
with assert_raises(LinAlgError):
chol_f(matrix)
# cholesky = GpuCholesky(lower=True, on_error='nan')
# chol_f = function([x], grad(gpu_cholesky(x).sum(), [x]))
# assert np.all(np.isnan(chol_f(matrix)))
def test_lower_triangular_and_cholesky_grad():
# Random lower triangular system is ill-conditioned.
#
# Reference
# -----------
# Viswanath, Divakar, and L. N. Trefethen. "Condition numbers of random triangular matrices."
# SIAM Journal on Matrix Analysis and Applications 19.2 (1998): 564-581.
#
# Use smaller number of N when using float32
if config.floatX == 'float64':
N = 100
else:
N = 5
rng = np.random.RandomState(utt.fetch_seed())
r = rng.randn(N, N).astype(config.floatX)
y = rng.rand(N, 1).astype(config.floatX)
def f(r, y):
PD = r.dot(r.T)
L = gpu_cholesky(PD)
A = gpu_solve_lower_triangular(L, y)
AAT = theano.tensor.dot(A, A.T)
B = AAT + theano.tensor.eye(N)
LB = gpu_cholesky(B)
return theano.tensor.sum(theano.tensor.log(theano.tensor.diag(LB)))
yield (lambda: utt.verify_grad(f, [r, y], 3, rng))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论