提交 c1e84a56 authored 作者: wonghang's avatar wonghang

Fix FLAKE8 issue

上级 c89d22ff
...@@ -1713,6 +1713,7 @@ KERNEL void eye(GLOBAL_MEM %(ctype)s *a, ga_size a_off, ...@@ -1713,6 +1713,7 @@ KERNEL void eye(GLOBAL_MEM %(ctype)s *a, ga_size a_off,
def c_code_cache_version(self): def c_code_cache_version(self):
return (10,) return (10,)
class GpuTri(GpuKernelBase, Op): class GpuTri(GpuKernelBase, Op):
""" """
Tri for GPU. Tri for GPU.
......
...@@ -85,7 +85,7 @@ if cusolver_available: ...@@ -85,7 +85,7 @@ if cusolver_available:
int(A), lda, int(B), int(A), lda, int(B),
ldb, int(devInfo)) ldb, int(devInfo))
cusolver.cusolverCheckStatus(status) cusolver.cusolverCheckStatus(status)
def attach_cusolver_handle_to_context(ctx): def attach_cusolver_handle_to_context(ctx):
handle = getattr(ctx, 'cusolver_handle', None) handle = getattr(ctx, 'cusolver_handle', None)
...@@ -226,7 +226,7 @@ class GpuCusolverSolve(Op): ...@@ -226,7 +226,7 @@ class GpuCusolverSolve(Op):
getrs = cusolver.cusolverDnDgetrs getrs = cusolver.cusolverDnDgetrs
else: else:
raise ValueError("Unsupported dtype") raise ValueError("Unsupported dtype")
if self.A_structure == 'symmetric': if self.A_structure == 'symmetric':
with context: with context:
workspace_size = potrf_bufferSize( workspace_size = potrf_bufferSize(
...@@ -291,6 +291,7 @@ class GpuCusolverSolve(Op): ...@@ -291,6 +291,7 @@ class GpuCusolverSolve(Op):
A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T) A_bar = -tensor.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
return [A_bar, b_bar] return [A_bar, b_bar]
class GpuCublasTriangularSolve(Op): class GpuCublasTriangularSolve(Op):
""" """
CUBLAS GPU Triangular Solve Op. CUBLAS GPU Triangular Solve Op.
...@@ -312,7 +313,8 @@ class GpuCublasTriangularSolve(Op): ...@@ -312,7 +313,8 @@ class GpuCublasTriangularSolve(Op):
def make_node(self, inp1, inp2): def make_node(self, inp1, inp2):
if not cublas_available: if not cublas_available:
raise RuntimeError('CUBLAS is not available and ' raise RuntimeError('CUBLAS is not available and '
'GpuCublasTriangularSolve Op can not be constructed.') 'GpuCublasTriangularSolve Op '
'can not be constructed.')
context_name = infer_context_name(inp1, inp2) context_name = infer_context_name(inp1, inp2)
inp1 = as_gpuarray_variable(inp1, context_name) inp1 = as_gpuarray_variable(inp1, context_name)
...@@ -399,7 +401,7 @@ class GpuCublasTriangularSolve(Op): ...@@ -399,7 +401,7 @@ class GpuCublasTriangularSolve(Op):
trsm = cublas.cublasDtrsm trsm = cublas.cublasDtrsm
else: else:
raise ValueError("Unsupported dtype") raise ValueError("Unsupported dtype")
with ctx: with ctx:
if b.ndim == 1: if b.ndim == 1:
# matrix vector solve # matrix vector solve
...@@ -428,6 +430,7 @@ class GpuCublasTriangularSolve(Op): ...@@ -428,6 +430,7 @@ class GpuCublasTriangularSolve(Op):
A_bar = tensor.triu(A_bar) A_bar = tensor.triu(A_bar)
return [A_bar, b_bar] return [A_bar, b_bar]
def gpu_solve(A, b, A_structure='general', trans='N'): def gpu_solve(A, b, A_structure='general', trans='N'):
if A_structure == 'lower': if A_structure == 'lower':
return GpuCublasTriangularSolve(True, trans)(A, b) return GpuCublasTriangularSolve(True, trans)(A, b)
...@@ -436,12 +439,15 @@ def gpu_solve(A, b, A_structure='general', trans='N'): ...@@ -436,12 +439,15 @@ def gpu_solve(A, b, A_structure='general', trans='N'):
return GpuCusolverSolve(A_structure, trans)(A, b) return GpuCusolverSolve(A_structure, trans)(A, b)
def gpu_solve_lower_triangular(A, b, trans='N'): def gpu_solve_lower_triangular(A, b, trans='N'):
return GpuCublasTriangularSolve(True, trans)(A, b) return GpuCublasTriangularSolve(True, trans)(A, b)
def gpu_solve_upper_triangular(A, b, trans='N'): def gpu_solve_upper_triangular(A, b, trans='N'):
return GpuCublasTriangularSolve(False, trans)(A, b) return GpuCublasTriangularSolve(False, trans)(A, b)
class GpuCholesky(Op): class GpuCholesky(Op):
""" """
CUSOLVER GPU Cholesky Op. CUSOLVER GPU Cholesky Op.
...@@ -475,7 +481,8 @@ class GpuCholesky(Op): ...@@ -475,7 +481,8 @@ class GpuCholesky(Op):
raise RuntimeError('CUSOLVER is not available and ' raise RuntimeError('CUSOLVER is not available and '
'GpuCholesky Op can not be constructed.') 'GpuCholesky Op can not be constructed.')
if skcuda.__version__ <= '0.5.1': if skcuda.__version__ <= '0.5.1':
warnings.warn('The GpuCholesky op requires scikit-cuda > 0.5.1 to work with CUDA 8') warnings.warn('The GpuCholesky op requires scikit-cuda > '
'0.5.1 to work with CUDA 8')
if not pygpu_available: if not pygpu_available:
raise RuntimeError('Missing pygpu or triu/tril functions.' raise RuntimeError('Missing pygpu or triu/tril functions.'
'Install or update libgpuarray.') 'Install or update libgpuarray.')
...@@ -531,7 +538,7 @@ class GpuCholesky(Op): ...@@ -531,7 +538,7 @@ class GpuCholesky(Op):
potrf = cusolver.cusolverDnDpotrf potrf = cusolver.cusolverDnDpotrf
else: else:
raise ValueError("Unsupported dtype") raise ValueError("Unsupported dtype")
with context: with context:
workspace_size = potrf_bufferSize( workspace_size = potrf_bufferSize(
context.cusolver_handle, l_parameter, n, L_ptr, lda) context.cusolver_handle, l_parameter, n, L_ptr, lda)
...@@ -544,9 +551,8 @@ class GpuCholesky(Op): ...@@ -544,9 +551,8 @@ class GpuCholesky(Op):
workspace_ptr = workspace.gpudata workspace_ptr = workspace.gpudata
dev_info_ptr = dev_info.gpudata dev_info_ptr = dev_info.gpudata
potrf( potrf(context.cusolver_handle, l_parameter, n, L_ptr,
context.cusolver_handle, l_parameter, n, L_ptr, lda, workspace_ptr, lda, workspace_ptr, workspace_size, dev_info_ptr)
workspace_size, dev_info_ptr)
val_dev_info = np.asarray(dev_info)[0] val_dev_info = np.asarray(dev_info)[0]
if val_dev_info > 0: if val_dev_info > 0:
...@@ -598,6 +604,7 @@ class GpuCholesky(Op): ...@@ -598,6 +604,7 @@ class GpuCholesky(Op):
return [grad] return [grad]
def gpu_cholesky(A, lower=True): def gpu_cholesky(A, lower=True):
return GpuCholesky(lower)(A) return GpuCholesky(lower)(A)
...@@ -612,7 +619,8 @@ class GpuMagmaBase(COp): ...@@ -612,7 +619,8 @@ class GpuMagmaBase(COp):
'gpuarray_helper.h', 'magma.h'] 'gpuarray_helper.h', 'magma.h']
def c_header_dirs(self): def c_header_dirs(self):
dirs = [gpuarray_helper_inc_dir(), pygpu.get_include(), config.cuda.include_path] dirs = [gpuarray_helper_inc_dir(), pygpu.get_include(),
config.cuda.include_path]
if config.magma.include_path: if config.magma.include_path:
dirs.append(config.magma.include_path) dirs.append(config.magma.include_path)
return dirs return dirs
......
...@@ -1412,12 +1412,14 @@ def local_gpua_dot22scalar(op, context_name, inputs, outputs): ...@@ -1412,12 +1412,14 @@ def local_gpua_dot22scalar(op, context_name, inputs, outputs):
def local_gpua_eye(op, context_name, inputs, outputs): def local_gpua_eye(op, context_name, inputs, outputs):
return GpuEye(dtype=op.dtype, context_name=context_name) return GpuEye(dtype=op.dtype, context_name=context_name)
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.basic.Tri]) @op_lifter([tensor.basic.Tri])
@register_opt2([tensor.basic.Tri], 'fast_compile') @register_opt2([tensor.basic.Tri], 'fast_compile')
def local_gpua_tri(op, context_name, inputs, outputs): def local_gpua_tri(op, context_name, inputs, outputs):
return GpuTri(dtype=op.dtype, context_name=context_name) return GpuTri(dtype=op.dtype, context_name=context_name)
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias]) @op_lifter([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias])
@register_opt2([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias], 'fast_compile') @register_opt2([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias], 'fast_compile')
...@@ -2589,7 +2591,7 @@ def local_gpua_images2neibs(op, context_name, inputs, outputs): ...@@ -2589,7 +2591,7 @@ def local_gpua_images2neibs(op, context_name, inputs, outputs):
@op_lifter([slinalg.Solve]) @op_lifter([slinalg.Solve])
@register_opt2([theano.tensor.slinalg.Solve], 'fast_compile') @register_opt2([theano.tensor.slinalg.Solve], 'fast_compile')
def local_gpu_solve(op, context_name, inputs, outputs): def local_gpu_solve(op, context_name, inputs, outputs):
if inputs[0].dtype not in ['float16', 'float32','float64']: if inputs[0].dtype not in ['float16', 'float32', 'float64']:
return return
if op.A_structure not in MATRIX_STRUCTURES_SOLVE: if op.A_structure not in MATRIX_STRUCTURES_SOLVE:
return return
...@@ -2615,7 +2617,8 @@ def local_gpu_solve(op, context_name, inputs, outputs): ...@@ -2615,7 +2617,8 @@ def local_gpu_solve(op, context_name, inputs, outputs):
def local_inplace_gpu_solve(node): def local_inplace_gpu_solve(node):
if isinstance(node.op, GpuCusolverSolve) and not node.op.inplace: if isinstance(node.op, GpuCusolverSolve) and not node.op.inplace:
with inherit_stack_trace(node.outputs): with inherit_stack_trace(node.outputs):
return [GpuCusolverSolve(A_structure=node.op.A_structure, trans=node.op.trans, return [GpuCusolverSolve(A_structure=node.op.A_structure,
trans=node.op.trans,
inplace=True)(*node.inputs)] inplace=True)(*node.inputs)]
......
...@@ -444,6 +444,7 @@ def test_gpueye(): ...@@ -444,6 +444,7 @@ def test_gpueye():
yield check, dtype, 5, 3, 6 yield check, dtype, 5, 3, 6
yield check, dtype, 3, 5, -6 yield check, dtype, 3, 5, -6
def test_hostfromgpu_shape_i(): def test_hostfromgpu_shape_i():
# Test that the shape is lifted over hostfromgpu # Test that the shape is lifted over hostfromgpu
...@@ -498,13 +499,14 @@ def test_Gpujoin_inplace(): ...@@ -498,13 +499,14 @@ def test_Gpujoin_inplace():
assert x.get_value(borrow=True, return_internal_type=True) is f(0) assert x.get_value(borrow=True, return_internal_type=True) is f(0)
assert np.allclose(f(0), [3, 4, 5]) assert np.allclose(f(0), [3, 4, 5])
def test_gpu_tril_triu(): def test_gpu_tril_triu():
def check_l(m, k=0): def check_l(m, k=0):
m_symb = T.matrix(dtype=m.dtype) m_symb = T.matrix(dtype=m.dtype)
k_symb = T.iscalar() k_symb = T.iscalar()
f = theano.function([m_symb,k_symb], f = theano.function([m_symb, k_symb],
T.tril(m_symb,k_symb), T.tril(m_symb, k_symb),
mode=mode_with_gpu) mode=mode_with_gpu)
result = f(m, k) result = f(m, k)
assert np.allclose(result, np.tril(m, k)) assert np.allclose(result, np.tril(m, k))
...@@ -515,8 +517,8 @@ def test_gpu_tril_triu(): ...@@ -515,8 +517,8 @@ def test_gpu_tril_triu():
def check_u(m, k=0): def check_u(m, k=0):
m_symb = T.matrix(dtype=m.dtype) m_symb = T.matrix(dtype=m.dtype)
k_symb = T.iscalar() k_symb = T.iscalar()
f = theano.function([m_symb,k_symb], f = theano.function([m_symb, k_symb],
T.triu(m_symb,k_symb), T.triu(m_symb, k_symb),
mode=mode_with_gpu) mode=mode_with_gpu)
result = f(m, k) result = f(m, k)
assert np.allclose(result, np.triu(m, k)) assert np.allclose(result, np.triu(m, k))
...@@ -529,7 +531,7 @@ def test_gpu_tril_triu(): ...@@ -529,7 +531,7 @@ def test_gpu_tril_triu():
for dtype in ['float64', 'float32', 'float16']: for dtype in ['float64', 'float32', 'float16']:
# try a big one # try a big one
m = np.asarray(test_rng.rand(5000,5000) * 2 - 1, dtype=dtype) m = np.asarray(test_rng.rand(5000, 5000) * 2 - 1, dtype=dtype)
yield check_l, m, 0 yield check_l, m, 0
yield check_l, m, 1 yield check_l, m, 1
yield check_l, m, -1 yield check_l, m, -1
...@@ -537,8 +539,8 @@ def test_gpu_tril_triu(): ...@@ -537,8 +539,8 @@ def test_gpu_tril_triu():
yield check_u, m, 0 yield check_u, m, 0
yield check_u, m, 1 yield check_u, m, 1
yield check_u, m, -1 yield check_u, m, -1
m = np.asarray(test_rng.rand(10,10) * 2 - 1, dtype=dtype) m = np.asarray(test_rng.rand(10, 10) * 2 - 1, dtype=dtype)
yield check_l, m, 0 yield check_l, m, 0
yield check_l, m, 1 yield check_l, m, 1
yield check_l, m, -1 yield check_l, m, -1
...@@ -547,7 +549,7 @@ def test_gpu_tril_triu(): ...@@ -547,7 +549,7 @@ def test_gpu_tril_triu():
yield check_u, m, 1 yield check_u, m, 1
yield check_u, m, -1 yield check_u, m, -1
m = np.asarray(test_rng.rand(10,5) * 2 - 1, dtype=dtype) m = np.asarray(test_rng.rand(10, 5) * 2 - 1, dtype=dtype)
yield check_l, m, 0 yield check_l, m, 0
yield check_l, m, 1 yield check_l, m, 1
yield check_l, m, -1 yield check_l, m, -1
...@@ -556,6 +558,7 @@ def test_gpu_tril_triu(): ...@@ -556,6 +558,7 @@ def test_gpu_tril_triu():
yield check_u, m, 1 yield check_u, m, 1
yield check_u, m, -1 yield check_u, m, -1
def test_gputri(): def test_gputri():
def check(dtype, N, M_=None, k=0): def check(dtype, N, M_=None, k=0):
# Theano does not accept None as a tensor. # Theano does not accept None as a tensor.
...@@ -583,7 +586,7 @@ def test_gputri(): ...@@ -583,7 +586,7 @@ def test_gputri():
yield check, dtype, 1000, 1000, 0 yield check, dtype, 1000, 1000, 0
yield check, dtype, 1000, 1000, -400 yield check, dtype, 1000, 1000, -400
yield check, dtype, 1000, 1000, 400 yield check, dtype, 1000, 1000, 400
yield check, dtype, 5 yield check, dtype, 5
# M != N, k = 0 # M != N, k = 0
yield check, dtype, 3, 5 yield check, dtype, 3, 5
......
...@@ -7,14 +7,13 @@ from numpy.linalg.linalg import LinAlgError ...@@ -7,14 +7,13 @@ from numpy.linalg.linalg import LinAlgError
import theano import theano
from theano import config from theano import config
from theano.gpuarray.linalg import (GpuCusolverSolve,GpuCublasTriangularSolve, from theano.gpuarray.linalg import (GpuCusolverSolve, GpuCublasTriangularSolve,
GpuCholesky, GpuMagmaCholesky, GpuCholesky, GpuMagmaCholesky,
GpuMagmaEigh, GpuMagmaMatrixInverse, GpuMagmaEigh, GpuMagmaMatrixInverse,
GpuMagmaQR, GpuMagmaSVD, GpuMagmaQR, GpuMagmaSVD,
cusolver_available, gpu_matrix_inverse, cusolver_available, gpu_matrix_inverse,
gpu_cholesky, gpu_cholesky,
gpu_solve, gpu_solve_lower_triangular, gpu_solve, gpu_solve_lower_triangular,
gpu_solve_upper_triangular,
gpu_svd, gpu_qr) gpu_svd, gpu_qr)
from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull, from theano.tensor.nlinalg import (SVD, MatrixInverse, QRFull,
QRIncomplete, eigh, matrix_inverse, qr) QRIncomplete, eigh, matrix_inverse, qr)
...@@ -26,6 +25,7 @@ from .config import mode_with_gpu, mode_without_gpu ...@@ -26,6 +25,7 @@ from .config import mode_with_gpu, mode_without_gpu
from .test_basic_ops import rand from .test_basic_ops import rand
from nose.tools import assert_raises from nose.tools import assert_raises
class TestCusolver(unittest.TestCase): class TestCusolver(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -163,6 +163,7 @@ class TestCusolver(unittest.TestCase): ...@@ -163,6 +163,7 @@ class TestCusolver(unittest.TestCase):
# check lower=True case # check lower=True case
self.verify_solve_grad(4, 3, 'general', lower=True, rng=rng) self.verify_solve_grad(4, 3, 'general', lower=True, rng=rng)
class TestGpuCholesky(unittest.TestCase): class TestGpuCholesky(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -254,6 +255,7 @@ class TestGpuCholesky(unittest.TestCase): ...@@ -254,6 +255,7 @@ class TestGpuCholesky(unittest.TestCase):
fn = self.get_gpu_cholesky_func(True, False) fn = self.get_gpu_cholesky_func(True, False)
self.assertRaises(LinAlgError, fn, A_val) self.assertRaises(LinAlgError, fn, A_val)
class TestGpuCholesky64(unittest.TestCase): class TestGpuCholesky64(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -599,6 +601,7 @@ class TestMagma(unittest.TestCase): ...@@ -599,6 +601,7 @@ class TestMagma(unittest.TestCase):
for node in fn.maker.fgraph.toposort() for node in fn.maker.fgraph.toposort()
]) ])
# mostly copied from theano/tensor/tests/test_slinalg.py # mostly copied from theano/tensor/tests/test_slinalg.py
def test_cholesky_grad(): def test_cholesky_grad():
rng = np.random.RandomState(utt.fetch_seed()) rng = np.random.RandomState(utt.fetch_seed())
...@@ -628,6 +631,7 @@ def test_cholesky_grad_indef(): ...@@ -628,6 +631,7 @@ def test_cholesky_grad_indef():
# chol_f = function([x], grad(gpu_cholesky(x).sum(), [x])) # chol_f = function([x], grad(gpu_cholesky(x).sum(), [x]))
# assert np.all(np.isnan(chol_f(matrix))) # assert np.all(np.isnan(chol_f(matrix)))
def test_lower_triangular_and_cholesky_grad(): def test_lower_triangular_and_cholesky_grad():
# Random lower triangular system is ill-conditioned. # Random lower triangular system is ill-conditioned.
# #
...@@ -645,12 +649,12 @@ def test_lower_triangular_and_cholesky_grad(): ...@@ -645,12 +649,12 @@ def test_lower_triangular_and_cholesky_grad():
r = rng.randn(N, N).astype(config.floatX) r = rng.randn(N, N).astype(config.floatX)
y = rng.rand(N, 1).astype(config.floatX) y = rng.rand(N, 1).astype(config.floatX)
def f(r,y): def f(r, y):
PD = r.dot(r.T) PD = r.dot(r.T)
L = gpu_cholesky(PD) L = gpu_cholesky(PD)
A = gpu_solve_lower_triangular(L,y) A = gpu_solve_lower_triangular(L, y)
AAT = theano.tensor.dot(A,A.T) AAT = theano.tensor.dot(A, A.T)
B = AAT + theano.tensor.eye(N) B = AAT + theano.tensor.eye(N)
LB = gpu_cholesky(B) LB = gpu_cholesky(B)
return theano.tensor.sum(theano.tensor.log(theano.tensor.diag(LB))) return theano.tensor.sum(theano.tensor.log(theano.tensor.diag(LB)))
yield (lambda: utt.verify_grad(f, [r,y], 3, rng)) yield (lambda: utt.verify_grad(f, [r, y], 3, rng))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论