提交 9895e2e2 authored 作者: Caglar's avatar Caglar

Added the flake8 changes.

上级 9f36d45a
import warnings
import theano import theano
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda import GpuOp, CudaNdarray from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
gpu_contiguous)
from theano.tensor import as_tensor_variable from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable
from scikits.cuda import cula
from theano.sandbox.cuda import cuda_ndarray
cula_available = False
try: try:
from scikits.cuda import cula from scikits.cuda import cula
scikits_cuda_available = True cula_available = False
except ImportError: except ImportError:
scikits_cuda_available = False warnings.warn("CULA import failed in theano.sandbox.cuda.cula")
if cula is not None: cula_initialized = False
if cula_available and cula and not cula_initialized:
try:
cula.culaInitialize() cula.culaInitialize()
cula_initialized = True
except:
warnings.warn("Initialization of cula failed.")
import numpy
class GpuSolve(GpuOp): class GpuSolve(GpuOp):
""" """
CULA GPU solver OP. CULA GPU solver OP.
trans: Whether to take the transpose of the input matrix or not. By default, trans: Whether to take the transpose of the input matrix
we will take the transpose of the input matrix, before feeding it into the Op. or not. By default, we will take the transpose of the
That is mainly, because that CULA requires inputs to be in Fortran order. input matrix, before feeding it into the Op. That is
mainly, because that CULA requires inputs to be in Fortran
order.
""" """
def __init__(self, trans='T'): def __init__(self, trans='T'):
self.trans = trans self.trans = trans
...@@ -61,21 +65,19 @@ class GpuSolve(GpuOp): ...@@ -61,21 +65,19 @@ class GpuSolve(GpuOp):
outputs = [storage_map[v] for v in node.outputs] outputs = [storage_map[v] for v in node.outputs]
def thunk(): def thunk():
input_shape = inputs[1][0].shape # size of the matrices to invert
#size of the matrices to invert
z = outputs[0] z = outputs[0]
#Matrix # Matrix
A = inputs[0][0] A = inputs[0][0]
#Solution vectors # Solution vectors
b = inputs[1][0] b = inputs[1][0]
A_cpy = A.copy() A_cpy = A.copy()
b_cpy = b.copy() b_cpy = b.copy()
#Convert b to F-order from c-order. # Convert b to F-order from c-order.
b_cpy = b_cpy.dimshuffle(1, 0).reshape((b.shape[0], b.shape[1])) b_cpy = b_cpy.dimshuffle(1, 0).reshape((b.shape[0], b.shape[1]))
A_pycuda = to_gpuarray(A_cpy) A_pycuda = to_gpuarray(A_cpy)
...@@ -102,7 +104,6 @@ class GpuSolve(GpuOp): ...@@ -102,7 +104,6 @@ class GpuSolve(GpuOp):
else: else:
raise ValueError('Invalid value for trans') raise ValueError('Invalid value for trans')
lda = max(1, n) lda = max(1, n)
ldb = max(1, n, l) ldb = max(1, n, l)
...@@ -116,7 +117,7 @@ class GpuSolve(GpuOp): ...@@ -116,7 +117,7 @@ class GpuSolve(GpuOp):
A_pycuda, b_pycuda = cula_gpu_solve(A_pycuda, b_pycuda, self.trans) A_pycuda, b_pycuda = cula_gpu_solve(A_pycuda, b_pycuda, self.trans)
#Convert b to F-order from c-order and assign it to output: # Convert b to F-order from c-order and assign it to output:
z[0] = b_cpy.reshape((b.shape[0], b.shape[1])).dimshuffle(1, 0) z[0] = b_cpy.reshape((b.shape[0], b.shape[1])).dimshuffle(1, 0)
thunk.inputs = inputs thunk.inputs = inputs
......
...@@ -24,8 +24,8 @@ if theano.config.mode == 'FAST_COMPILE': ...@@ -24,8 +24,8 @@ if theano.config.mode == 'FAST_COMPILE':
else: else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
class TestCula(unittest.TestCase):
class TestCula(unittest.TestCase):
def run_gpu_solve(self, A_val, x_val): def run_gpu_solve(self, A_val, x_val):
b_val = numpy.dot(A_val, x_val) b_val = numpy.dot(A_val, x_val)
A = theano.tensor.matrix("A", dtype="float32") A = theano.tensor.matrix("A", dtype="float32")
...@@ -38,23 +38,32 @@ class TestCula(unittest.TestCase): ...@@ -38,23 +38,32 @@ class TestCula(unittest.TestCase):
utt.assert_allclose(x_res, x_val) utt.assert_allclose(x_res, x_val)
def test_diag_solve(self): def test_diag_solve(self):
A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]], dtype="float32") numpy.random.seed(1)
x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 1)).astype("float32") A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
dtype="float32")
x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
1)).astype("float32")
self.run_gpu_solve(A_val, x_val) self.run_gpu_solve(A_val, x_val)
def test_sym_solve(self): def test_sym_solve(self):
numpy.random.seed(1)
A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
A_sym = (A_val + A_val.T) / 2.0 A_sym = (A_val + A_val.T) / 2.0
x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 1)).astype("float32") x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
1)).astype("float32")
self.run_gpu_solve(A_sym, x_val) self.run_gpu_solve(A_sym, x_val)
def test_orth_solve(self): def test_orth_solve(self):
numpy.random.seed(1)
A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
A_orth = numpy.linalg.svd(A_val)[0] A_orth = numpy.linalg.svd(A_val)[0]
x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1], 1)).astype("float32") x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1],
1)).astype("float32")
self.run_gpu_solve(A_orth, x_val) self.run_gpu_solve(A_orth, x_val)
def test_uni_rand_solve(self): def test_uni_rand_solve(self):
numpy.random.seed(1)
A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32") A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 4)).astype("float32") x_val = numpy.random.uniform(-0.4, 0.4,
(A_val.shape[1], 4)).astype("float32")
self.run_gpu_solve(A_val, x_val) self.run_gpu_solve(A_val, x_val)
...@@ -537,11 +537,15 @@ def test_erfinvgpu(): ...@@ -537,11 +537,15 @@ def test_erfinvgpu():
def test_local_gpu_solve(): def test_local_gpu_solve():
numpy.random.seed(1)
def cmp(a_shp, b_shp): def cmp(a_shp, b_shp):
a0 = numpy.random.uniform(-0.4, 0.4, a_shp).astype('float32') a0 = numpy.random.uniform(-0.4, 0.4,
a_shp).astype('float32')
a = cuda.shared_constructor(a0, 'a') a = cuda.shared_constructor(a0, 'a')
b0 = numpy.random.uniform(-0.4, 0.4, b_shp).astype('float32') b0 = numpy.random.uniform(-0.4, 0.4,
b_shp).astype('float32')
b = cuda.shared_constructor(b0, 'b') b = cuda.shared_constructor(b0, 'b')
f = pfunc([], tensor.slinalg.solve(a, b), mode=mode_with_gpu) f = pfunc([], tensor.slinalg.solve(a, b), mode=mode_with_gpu)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论