Added the flake8 changes.

9895e2e2 · Caglar · 9f36d45a · 9895e2e2 · 9895e2e2 · 9895e2e2
--- a/theano/sandbox/cuda/cula.py
+++ b/theano/sandbox/cuda/cula.py
+import warnings
 import theano
 from theano.sandbox.cuda.type import CudaNdarrayType
-from theano.sandbox.cuda import GpuOp, CudaNdarray
+from theano.sandbox.cuda import GpuOp
-from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
-                                           gpu_contiguous)
-from theano.tensor import as_tensor_variable
+from theano.sandbox.cuda.basic_ops import as_cuda_ndarray_variable
-from scikits.cuda import cula
-from theano.sandbox.cuda import cuda_ndarray
+cula_available = False
 try:
    from scikits.cuda import cula
-    scikits_cuda_available = True
+    cula_available = False
 except ImportError:
-    scikits_cuda_available = False
+    warnings.warn("CULA import failed in theano.sandbox.cuda.cula")
-if cula is not None:
+cula_initialized = False
+if cula_available and cula and not cula_initialized:
+    try:
        cula.culaInitialize()
+        cula_initialized = True
+    except:
+        warnings.warn("Initialization of cula failed.")
-import numpy
 class GpuSolve(GpuOp):
    """
    CULA GPU solver OP.
-    trans: Whether to take the transpose of the input matrix or not. By default,
+    trans: Whether to take the transpose of the input matrix
-    we will take the transpose of the input matrix, before feeding it into the Op.
+    or not. By default, we will take the transpose of the
-    That is mainly, because that CULA requires inputs to be in Fortran order.
+    input matrix, before feeding it into the Op. That is
+    mainly, because that CULA requires inputs to be in Fortran
+    order.
    """
    def __init__(self, trans='T'):
        self.trans = trans
@@ -61,21 +65,19 @@ class GpuSolve(GpuOp):
        outputs = [storage_map[v] for v in node.outputs]
        def thunk():
-            input_shape = inputs[1][0].shape
+            # size of the matrices to invert
-            #size of the matrices to invert
            z = outputs[0]
-            #Matrix
+            # Matrix
            A = inputs[0][0]
-            #Solution vectors
+            # Solution vectors
            b = inputs[1][0]
            A_cpy = A.copy()
            b_cpy = b.copy()
-            #Convert b to F-order from c-order.
+            # Convert b to F-order from c-order.
            b_cpy = b_cpy.dimshuffle(1, 0).reshape((b.shape[0], b.shape[1]))
            A_pycuda = to_gpuarray(A_cpy)
@@ -102,7 +104,6 @@ class GpuSolve(GpuOp):
                else:
                    raise ValueError('Invalid value for trans')
                lda = max(1, n)
                ldb = max(1, n, l)
@@ -116,7 +117,7 @@ class GpuSolve(GpuOp):
            A_pycuda, b_pycuda = cula_gpu_solve(A_pycuda, b_pycuda, self.trans)
-            #Convert b to F-order from c-order and assign it to output:
+            # Convert b to F-order from c-order and assign it to output:
            z[0] = b_cpy.reshape((b.shape[0], b.shape[1])).dimshuffle(1, 0)
        thunk.inputs = inputs

--- a/theano/sandbox/cuda/tests/test_cula.py
+++ b/theano/sandbox/cuda/tests/test_cula.py
@@ -24,8 +24,8 @@ if theano.config.mode == 'FAST_COMPILE':
 else:
    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
-class TestCula(unittest.TestCase):
+class TestCula(unittest.TestCase):
    def run_gpu_solve(self, A_val, x_val):
        b_val = numpy.dot(A_val, x_val)
        A = theano.tensor.matrix("A", dtype="float32")
@@ -38,23 +38,32 @@ class TestCula(unittest.TestCase):
        utt.assert_allclose(x_res, x_val)
    def test_diag_solve(self):
-        A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]], dtype="float32")
+        numpy.random.seed(1)
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 1)).astype("float32")
+        A_val = numpy.asarray([[2, 0, 0], [0, 1, 0], [0, 0, 1]],
+                              dtype="float32")
+        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
+                                     1)).astype("float32")
        self.run_gpu_solve(A_val, x_val)
    def test_sym_solve(self):
+        numpy.random.seed(1)
        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
        A_sym = (A_val + A_val.T) / 2.0
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 1)).astype("float32")
+        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1],
+                                     1)).astype("float32")
        self.run_gpu_solve(A_sym, x_val)
    def test_orth_solve(self):
+        numpy.random.seed(1)
        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
        A_orth = numpy.linalg.svd(A_val)[0]
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1], 1)).astype("float32")
+        x_val = numpy.random.uniform(-0.4, 0.4, (A_orth.shape[1],
+                                     1)).astype("float32")
        self.run_gpu_solve(A_orth, x_val)
    def test_uni_rand_solve(self):
+        numpy.random.seed(1)
        A_val = numpy.random.uniform(-0.4, 0.4, (5, 5)).astype("float32")
-        x_val = numpy.random.uniform(-0.4, 0.4, (A_val.shape[1], 4)).astype("float32")
+        x_val = numpy.random.uniform(-0.4, 0.4,
+                                     (A_val.shape[1], 4)).astype("float32")
        self.run_gpu_solve(A_val, x_val)
--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -537,11 +537,15 @@ def test_erfinvgpu():
 def test_local_gpu_solve():
+    numpy.random.seed(1)
    def cmp(a_shp, b_shp):
-        a0 = numpy.random.uniform(-0.4, 0.4, a_shp).astype('float32')
+        a0 = numpy.random.uniform(-0.4, 0.4,
+                                  a_shp).astype('float32')
        a = cuda.shared_constructor(a0, 'a')
-        b0 = numpy.random.uniform(-0.4, 0.4, b_shp).astype('float32')
+        b0 = numpy.random.uniform(-0.4, 0.4,
+                                  b_shp).astype('float32')
        b = cuda.shared_constructor(b0, 'b')
        f = pfunc([], tensor.slinalg.solve(a, b), mode=mode_with_gpu)