Added the flake8 changes.

9895e2e2 · Caglar · 9f36d45a · 9895e2e2 · 9895e2e2 · 9895e2e2
--- a/theano/sandbox/cuda/cula.py
+++ b/theano/sandbox/cuda/cula.py
+import warnings
+
 import theano
 from theano.sandbox.cuda.type import CudaNdarrayType
-from theano.sandbox.cuda import GpuOp, CudaNdarray
-
-from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
-                                           gpu_contiguous)
+from theano.sandbox.cuda import GpuOp

-from theano.tensor import as_tensor_variable
-from scikits.cuda import cula
-from theano.sandbox.cuda import cuda_ndarray
+from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable

+cula_available = False
 try:
    from scikits.cuda import cula
-    scikits_cuda_available = True
+    cula_available = False
 except ImportError:
-    scikits_cuda_available = False
+    warnings.warn("CULA import failed in theano.sandbox.cuda.cula")

-if cula is not None:
-    cula.culaInitialize()
+cula_initialized = False
+if cula_available and cula and not cula_initialized:
+    try:
+        cula.culaInitialize()
+        cula_initialized = True
+    except:
+        warnings.warn("Initialization of cula failed.")

-import numpy

 class GpuSolve(GpuOp):
    """
    CULA GPU solver OP.

-    trans: Whether to take the transpose of the input matrix or not. By default,
-    we will take the transpose of the input matrix, before feeding it into the Op.
-    That is mainly, because that CULA requires inputs to be in Fortran order.
+    trans: Whether to take the transpose of the input matrix
+    or not. By default, we will take the transpose of the
+    input matrix, before feeding it into the Op. That is
+    mainly, because that CULA requires inputs to be in Fortran
+    order.
    """
    def __init__(self, trans='T'):
        self.trans = trans
@@ -61,21 +65,19 @@ class GpuSolve(GpuOp):
        outputs = [storage_map[v] for v in node.outputs]

        def thunk():
-            input_shape = inputs[1][0].shape
-
-            #size of the matrices to invert
+            # size of the matrices to invert
            z = outputs[0]

-            #Matrix
+            # Matrix
            A = inputs[0][0]

-            #Solution vectors
+            # Solution vectors
            b = inputs[1][0]

            A_cpy = A.copy()
            b_cpy = b.copy()

-            #Convert b to F-order from c-order.
+            # Convert b to F-order from c-order.
            b_cpy = b_cpy.dimshuffle(1, 0).reshape((b.shape[0], b.shape[1]))

            A_pycuda = to_gpuarray(A_cpy)
@@ -93,16 +95,15 @@ class GpuSolve(GpuOp):
                    l, n = A_shape
                    k, m = b_shape
                    if n != k:
-                       raise ValueError('A and b must be aligned.')
+                        raise ValueError('A and b must be aligned.')
                elif trans in ['N']:
                    n, l = A_shape
                    k, m = b_shape
                    if l != m:
-                       raise ValueError('A and b must be aligned.')
+                        raise ValueError('A and b must be aligned.')
                else:
                    raise ValueError('Invalid value for trans')

-
                lda = max(1, n)
                ldb = max(1, n, l)

@@ -116,7 +117,7 @@ class GpuSolve(GpuOp):

            A_pycuda, b_pycuda = cula_gpu_solve(A_pycuda, b_pycuda, self.trans)

-            #Convert b to F-order from c-order and assign it to output:
+            # Convert b to F-order from c-order and assign it to output:
            z[0] = b_cpy.reshape((b.shape[0], b.shape[1])).dimshuffle(1, 0)

        thunk.inputs = inputs

--- a/theano/sandbox/cuda/tests/test_cula.py
+++ b/theano/sandbox/cuda/tests/test_cula.py
@@ -24,8 +24,8 @@ if theano.config.mode == 'FAST_COMPILE':
 else:
    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')

-class TestCula(unittest.TestCase):

+class TestCula(unittest.TestCase):
    def run_gpu_solve(self, A_val, x_val):
        b_val = numpy.dot(A_val, x_val)
        A = theano.tensor.matrix("A", dtype="float32")
@@ -38,23 +38,32 @@ class TestCula(unittest.TestCase):
        utt.assert_allclose(x_res, x_val)

    def test_diag_solve(self):
-        A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]], dtype="float32")
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 1)).astype("float32")
+        numpy.random.seed(1)
+        A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
+                              dtype="float32")
+        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
+                                     1)).astype("float32")
        self.run_gpu_solve(A_val, x_val)

    def test_sym_solve(self):
+        numpy.random.seed(1)
        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
        A_sym = (A_val + A_val.T) / 2.0
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 1)).astype("float32")
+        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
+                                     1)).astype("float32")
        self.run_gpu_solve(A_sym, x_val)

    def test_orth_solve(self):
+        numpy.random.seed(1)
        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
        A_orth = numpy.linalg.svd(A_val)[0]
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1], 1)).astype("float32")
+        x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1],
+                                     1)).astype("float32")
        self.run_gpu_solve(A_orth, x_val)

    def test_uni_rand_solve(self):
+        numpy.random.seed(1)
        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 4)).astype("float32")
+        x_val = numpy.random.uniform(-0.4, 0.4,
+                                     (A_val.shape[1], 4)).astype("float32")
        self.run_gpu_solve(A_val, x_val)
--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -537,11 +537,15 @@ def test_erfinvgpu():


 def test_local_gpu_solve():
+    numpy.random.seed(1)
+
    def cmp(a_shp, b_shp):
-        a0 = numpy.random.uniform(-0.4, 0.4, a_shp).astype('float32')
+        a0 = numpy.random.uniform(-0.4, 0.4,
+                                  a_shp).astype('float32')
        a = cuda.shared_constructor(a0, 'a')

-        b0 = numpy.random.uniform(-0.4, 0.4, b_shp).astype('float32')
+        b0 = numpy.random.uniform(-0.4, 0.4,
+                                  b_shp).astype('float32')
        b = cuda.shared_constructor(b0, 'b')

        f = pfunc([], tensor.slinalg.solve(a, b), mode=mode_with_gpu)