提交 9895e2e2 authored 作者: Caglar's avatar Caglar

Added the flake8 changes.

上级 9f36d45a
import warnings
import theano
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp, CudaNdarray
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
gpu_contiguous)
from theano.sandbox.cuda import GpuOp
from theano.tensor import as_tensor_variable
from scikits.cuda import cula
from theano.sandbox.cuda import cuda_ndarray
from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable
cula_available = False
try:
from scikits.cuda import cula
scikits_cuda_available = True
cula_available = False
except ImportError:
scikits_cuda_available = False
warnings.warn("CULA import failed in theano.sandbox.cuda.cula")
if cula is not None:
cula.culaInitialize()
cula_initialized = False
if cula_available and cula and not cula_initialized:
try:
cula.culaInitialize()
cula_initialized = True
except:
warnings.warn("Initialization of cula failed.")
import numpy
class GpuSolve(GpuOp):
"""
CULA GPU solver OP.
trans: Whether to take the transpose of the input matrix or not. By default,
we will take the transpose of the input matrix, before feeding it into the Op.
That is mainly, because that CULA requires inputs to be in Fortran order.
trans: Whether to take the transpose of the input matrix
or not. By default, we will take the transpose of the
input matrix, before feeding it into the Op. That is
mainly, because that CULA requires inputs to be in Fortran
order.
"""
def __init__(self, trans='T'):
self.trans = trans
......@@ -61,21 +65,19 @@ class GpuSolve(GpuOp):
outputs = [storage_map[v] for v in node.outputs]
def thunk():
input_shape = inputs[1][0].shape
#size of the matrices to invert
# size of the matrices to invert
z = outputs[0]
#Matrix
# Matrix
A = inputs[0][0]
#Solution vectors
# Solution vectors
b = inputs[1][0]
A_cpy = A.copy()
b_cpy = b.copy()
#Convert b to F-order from c-order.
# Convert b to F-order from c-order.
b_cpy = b_cpy.dimshuffle(1, 0).reshape((b.shape[0], b.shape[1]))
A_pycuda = to_gpuarray(A_cpy)
......@@ -93,16 +95,15 @@ class GpuSolve(GpuOp):
l, n = A_shape
k, m = b_shape
if n != k:
raise ValueError('A and b must be aligned.')
raise ValueError('A and b must be aligned.')
elif trans in ['N']:
n, l = A_shape
k, m = b_shape
if l != m:
raise ValueError('A and b must be aligned.')
raise ValueError('A and b must be aligned.')
else:
raise ValueError('Invalid value for trans')
lda = max(1, n)
ldb = max(1, n, l)
......@@ -116,7 +117,7 @@ class GpuSolve(GpuOp):
A_pycuda, b_pycuda = cula_gpu_solve(A_pycuda, b_pycuda, self.trans)
#Convert b to F-order from c-order and assign it to output:
# Convert b to F-order from c-order and assign it to output:
z[0] = b_cpy.reshape((b.shape[0], b.shape[1])).dimshuffle(1, 0)
thunk.inputs = inputs
......
......@@ -24,8 +24,8 @@ if theano.config.mode == 'FAST_COMPILE':
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
class TestCula(unittest.TestCase):
class TestCula(unittest.TestCase):
def run_gpu_solve(self, A_val, x_val):
b_val = numpy.dot(A_val, x_val)
A = theano.tensor.matrix("A", dtype="float32")
......@@ -38,23 +38,32 @@ class TestCula(unittest.TestCase):
utt.assert_allclose(x_res, x_val)
def test_diag_solve(self):
A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]], dtype="float32")
x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 1)).astype("float32")
numpy.random.seed(1)
A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
dtype="float32")
x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
1)).astype("float32")
self.run_gpu_solve(A_val, x_val)
def test_sym_solve(self):
numpy.random.seed(1)
A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
A_sym = (A_val + A_val.T) / 2.0
x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 1)).astype("float32")
x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
1)).astype("float32")
self.run_gpu_solve(A_sym, x_val)
def test_orth_solve(self):
numpy.random.seed(1)
A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
A_orth = numpy.linalg.svd(A_val)[0]
x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1], 1)).astype("float32")
x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1],
1)).astype("float32")
self.run_gpu_solve(A_orth, x_val)
def test_uni_rand_solve(self):
numpy.random.seed(1)
A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 4)).astype("float32")
x_val = numpy.random.uniform(-0.4, 0.4,
(A_val.shape[1], 4)).astype("float32")
self.run_gpu_solve(A_val, x_val)
......@@ -537,11 +537,15 @@ def test_erfinvgpu():
def test_local_gpu_solve():
numpy.random.seed(1)
def cmp(a_shp, b_shp):
a0 = numpy.random.uniform(-0.4, 0.4, a_shp).astype('float32')
a0 = numpy.random.uniform(-0.4, 0.4,
a_shp).astype('float32')
a = cuda.shared_constructor(a0, 'a')
b0 = numpy.random.uniform(-0.4, 0.4, b_shp).astype('float32')
b0 = numpy.random.uniform(-0.4, 0.4,
b_shp).astype('float32')
b = cuda.shared_constructor(b0, 'b')
f = pfunc([], tensor.slinalg.solve(a, b), mode=mode_with_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论