提交 c1e84a56 authored 作者: wonghang's avatar wonghang

Fix FLAKE8 issue

上级 c89d22ff
......@@ -1713,6 +1713,7 @@ KERNEL void eye(GLOBAL_MEM %(ctype)s *a, ga_size a_off,
def c_code_cache_version(self):
return (10,)
class GpuTri(GpuKernelBase, Op):
"""
Tri for GPU.
......
......@@ -85,7 +85,7 @@ if cusolver_available:
int(A), lda, int(B),
ldb, int(devInfo))
cusolver.cusolverCheckStatus(status)
def attach_cusolver_handle_to_context(ctx):
handle = getattr(ctx, 'cusolver_handle', None)
......@@ -226,7 +226,7 @@ class GpuCusolverSolve(Op):
getrs = cusolver.cusolverDnDgetrs
else:
raise ValueError("Unsupported dtype")
if self.A_structure == 'symmetric':
with context:
workspace_size = potrf_bufferSize(
......@@ -291,6 +291,7 @@ class GpuCusolverSolve(Op):
A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
return [A_bar, b_bar]
class GpuCublasTriangularSolve(Op):
"""
CUBLAS GPU Triangular Solve Op.
......@@ -312,7 +313,8 @@ class GpuCublasTriangularSolve(Op):
def make_node(self, inp1, inp2):
if not cublas_available:
raise RuntimeError('CUBLAS is not available and '
'GpuCublasTriangularSolve Op can not be constructed.')
'GpuCublasTriangularSolve Op '
'can not be constructed.')
context_name = infer_context_name(inp1, inp2)
inp1 = as_gpuarray_variable(inp1, context_name)
......@@ -399,7 +401,7 @@ class GpuCublasTriangularSolve(Op):
trsm = cublas.cublasDtrsm
else:
raise ValueError("Unsupported dtype")
with ctx:
if b.ndim == 1:
# matrix vector solve
......@@ -428,6 +430,7 @@ class GpuCublasTriangularSolve(Op):
A_bar = tensor.triu(A_bar)
return [A_bar, b_bar]
def gpu_solve(A, b, A_structure='general', trans='N'):
if A_structure == 'lower':
return GpuCublasTriangularSolve(True, trans)(A, b)
......@@ -436,12 +439,15 @@ def gpu_solve(A, b, A_structure='general', trans='N'):
return GpuCusolverSolve(A_structure, trans)(A, b)
def gpu_solve_lower_triangular(A, b, trans='N'):
return GpuCublasTriangularSolve(True, trans)(A, b)
def gpu_solve_upper_triangular(A, b, trans='N'):
return GpuCublasTriangularSolve(False, trans)(A, b)
class GpuCholesky(Op):
"""
CUSOLVER GPU Cholesky Op.
......@@ -475,7 +481,8 @@ class GpuCholesky(Op):
raise RuntimeError('CUSOLVER is not available and '
'GpuCholesky Op can not be constructed.')
if skcuda.__version__ <= '0.5.1':
warnings.warn('The GpuCholesky op requires scikit-cuda > 0.5.1 to work with CUDA 8')
warnings.warn('The GpuCholesky op requires scikit-cuda > '
'0.5.1 to work with CUDA 8')
if not pygpu_available:
raise RuntimeError('Missing pygpu or triu/tril functions.'
'Install or update libgpuarray.')
......@@ -531,7 +538,7 @@ class GpuCholesky(Op):
potrf = cusolver.cusolverDnDpotrf
else:
raise ValueError("Unsupported dtype")
with context:
workspace_size = potrf_bufferSize(
context.cusolver_handle, l_parameter, n, L_ptr, lda)
......@@ -544,9 +551,8 @@ class GpuCholesky(Op):
workspace_ptr = workspace.gpudata
dev_info_ptr = dev_info.gpudata
potrf(
context.cusolver_handle, l_parameter, n, L_ptr, lda, workspace_ptr,
workspace_size, dev_info_ptr)
potrf(context.cusolver_handle, l_parameter, n, L_ptr,
lda, workspace_ptr, workspace_size, dev_info_ptr)
val_dev_info = np.asarray(dev_info)[0]
if val_dev_info > 0:
......@@ -598,6 +604,7 @@ class GpuCholesky(Op):
return [grad]
def gpu_cholesky(A, lower=True):
return GpuCholesky(lower)(A)
......@@ -612,7 +619,8 @@ class GpuMagmaBase(COp):
'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self):
dirs = [gpuarray_helper_inc_dir(), pygpu.get_include(), config.cuda.include_path]
dirs = [gpuarray_helper_inc_dir(), pygpu.get_include(),
config.cuda.include_path]
if config.magma.include_path:
dirs.append(config.magma.include_path)
return dirs
......
......@@ -1412,12 +1412,14 @@ def local_gpua_dot22scalar(op, context_name, inputs, outputs):
def local_gpua_eye(op, context_name, inputs, outputs):
return GpuEye(dtype=op.dtype, context_name=context_name)
@register_opt('fast_compile')
@op_lifter([tensor.basic.Tri])
@register_opt2([tensor.basic.Tri], 'fast_compile')
def local_gpua_tri(op, context_name, inputs, outputs):
return GpuTri(dtype=op.dtype, context_name=context_name)
@register_opt('fast_compile')
@op_lifter([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias])
@register_opt2([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias], 'fast_compile')
......@@ -2589,7 +2591,7 @@ def local_gpua_images2neibs(op, context_name, inputs, outputs):
@op_lifter([slinalg.Solve])
@register_opt2([theano.tensor.slinalg.Solve], 'fast_compile')
def local_gpu_solve(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float16', 'float32','float64']:
if inputs[0].dtype not in ['float16', 'float32', 'float64']:
return
if op.A_structure not in MATRIX_STRUCTURES_SOLVE:
return
......@@ -2615,7 +2617,8 @@ def local_gpu_solve(op, context_name, inputs, outputs):
def local_inplace_gpu_solve(node):
if isinstance(node.op, GpuCusolverSolve) and not node.op.inplace:
with inherit_stack_trace(node.outputs):
return [GpuCusolverSolve(A_structure=node.op.A_structure, trans=node.op.trans,
return [GpuCusolverSolve(A_structure=node.op.A_structure,
trans=node.op.trans,
inplace=True)(*node.inputs)]
......
......@@ -444,6 +444,7 @@ def test_gpueye():
yield check, dtype, 5, 3, 6
yield check, dtype, 3, 5, -6
def test_hostfromgpu_shape_i():
# Test that the shape is lifted over hostfromgpu
......@@ -498,13 +499,14 @@ def test_Gpujoin_inplace():
assert x.get_value(borrow=True, return_internal_type=True) is f(0)
assert np.allclose(f(0), [3, 4, 5])
def test_gpu_tril_triu():
def check_l(m, k=0):
m_symb = T.matrix(dtype=m.dtype)
k_symb = T.iscalar()
f = theano.function([m_symb,k_symb],
T.tril(m_symb,k_symb),
f = theano.function([m_symb, k_symb],
T.tril(m_symb, k_symb),
mode=mode_with_gpu)
result = f(m, k)
assert np.allclose(result, np.tril(m, k))
......@@ -515,8 +517,8 @@ def test_gpu_tril_triu():
def check_u(m, k=0):
m_symb = T.matrix(dtype=m.dtype)
k_symb = T.iscalar()
f = theano.function([m_symb,k_symb],
T.triu(m_symb,k_symb),
f = theano.function([m_symb, k_symb],
T.triu(m_symb, k_symb),
mode=mode_with_gpu)
result = f(m, k)
assert np.allclose(result, np.triu(m, k))
......@@ -529,7 +531,7 @@ def test_gpu_tril_triu():
for dtype in ['float64', 'float32', 'float16']:
# try a big one
m = np.asarray(test_rng.rand(5000,5000) * 2 - 1, dtype=dtype)
m = np.asarray(test_rng.rand(5000, 5000) * 2 - 1, dtype=dtype)
yield check_l, m, 0
yield check_l, m, 1
yield check_l, m, -1
......@@ -537,8 +539,8 @@ def test_gpu_tril_triu():
yield check_u, m, 0
yield check_u, m, 1
yield check_u, m, -1
m = np.asarray(test_rng.rand(10,10) * 2 - 1, dtype=dtype)
m = np.asarray(test_rng.rand(10, 10) * 2 - 1, dtype=dtype)
yield check_l, m, 0
yield check_l, m, 1
yield check_l, m, -1
......@@ -547,7 +549,7 @@ def test_gpu_tril_triu():
yield check_u, m, 1
yield check_u, m, -1
m = np.asarray(test_rng.rand(10,5) * 2 - 1, dtype=dtype)
m = np.asarray(test_rng.rand(10, 5) * 2 - 1, dtype=dtype)
yield check_l, m, 0
yield check_l, m, 1
yield check_l, m, -1
......@@ -556,6 +558,7 @@ def test_gpu_tril_triu():
yield check_u, m, 1
yield check_u, m, -1
def test_gputri():
def check(dtype, N, M_=None, k=0):
# Theano does not accept None as a tensor.
......@@ -583,7 +586,7 @@ def test_gputri():
yield check, dtype, 1000, 1000, 0
yield check, dtype, 1000, 1000, -400
yield check, dtype, 1000, 1000, 400
yield check, dtype, 5
# M != N, k = 0
yield check, dtype, 3, 5
......
......@@ -7,14 +7,13 @@ from numpy.linalg.linalg import LinAlgError
import theano
from theano import config
from theano.gpuarray.linalg import (GpuCusolverSolve,GpuCublasTriangularSolve,
from theano.gpuarray.linalg import (GpuCusolverSolve, GpuCublasTriangularSolve,
GpuCholesky, GpuMagmaCholesky,
GpuMagmaEigh, GpuMagmaMatrixInverse,
GpuMagmaQR, GpuMagmaSVD,
cusolver_available, gpu_matrix_inverse,
gpu_cholesky,
gpu_solve, gpu_solve_lower_triangular,
gpu_solve_upper_triangular,
gpu_svd, gpu_qr)
from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull,
QRIncomplete, eigh, matrix_inverse, qr)
......@@ -26,6 +25,7 @@ from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand
from nose.tools import assert_raises
class TestCusolver(unittest.TestCase):
def setUp(self):
......@@ -163,6 +163,7 @@ class TestCusolver(unittest.TestCase):
# check lower=True case
self.verify_solve_grad(4, 3, 'general', lower=True, rng=rng)
class TestGpuCholesky(unittest.TestCase):
def setUp(self):
......@@ -254,6 +255,7 @@ class TestGpuCholesky(unittest.TestCase):
fn = self.get_gpu_cholesky_func(True, False)
self.assertRaises(LinAlgError, fn, A_val)
class TestGpuCholesky64(unittest.TestCase):
def setUp(self):
......@@ -599,6 +601,7 @@ class TestMagma(unittest.TestCase):
for node in fn.maker.fgraph.toposort()
])
# mostly copied from theano/tensor/tests/test_slinalg.py
def test_cholesky_grad():
rng = np.random.RandomState(utt.fetch_seed())
......@@ -628,6 +631,7 @@ def test_cholesky_grad_indef():
# chol_f = function([x], grad(gpu_cholesky(x).sum(), [x]))
# assert np.all(np.isnan(chol_f(matrix)))
def test_lower_triangular_and_cholesky_grad():
# Random lower triangular system is ill-conditioned.
#
......@@ -645,12 +649,12 @@ def test_lower_triangular_and_cholesky_grad():
r = rng.randn(N, N).astype(config.floatX)
y = rng.rand(N, 1).astype(config.floatX)
def f(r,y):
def f(r, y):
PD = r.dot(r.T)
L = gpu_cholesky(PD)
A = gpu_solve_lower_triangular(L,y)
AAT = theano.tensor.dot(A,A.T)
A = gpu_solve_lower_triangular(L, y)
AAT = theano.tensor.dot(A, A.T)
B = AAT + theano.tensor.eye(N)
LB = gpu_cholesky(B)
return theano.tensor.sum(theano.tensor.log(theano.tensor.diag(LB)))
yield (lambda: utt.verify_grad(f, [r,y], 3, rng))
yield (lambda: utt.verify_grad(f, [r, y], 3, rng))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论