basic_ops.py has been modified in order to respect the flake8 style

ed7759fb · Chiheb Trabelsi · 200babca · ed7759fb
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
 import copy
 import logging
 import sys
-
+import warnings
 import numpy
 from six import iteritems
 from six.moves import StringIO, xrange
@@ -12,6 +12,9 @@ from theano import gof, Type, Apply
 from theano import tensor, scalar, config
 from theano.gradient import grad_undefined
 from theano.scalar import Scalar
+from theano.sandbox.cuda import GpuOp
+from theano.sandbox.cuda.type import CudaNdarrayType
+from theano.sandbox.cuda.elemwise import NaiveAlgo

 scal = scalar  # somewhere scalar gets reassigned to be a function

@@ -24,10 +27,6 @@ try:
 except ImportError:
    pass

-from theano.sandbox.cuda import GpuOp
-from theano.sandbox.cuda.type import CudaNdarrayType
-from theano.sandbox.cuda.elemwise import NaiveAlgo
-

 _logger_name = 'theano.sandbox.cuda.basic_ops'
 _logger = logging.getLogger(_logger_name)
@@ -596,10 +595,8 @@ class GpuCAReduce(GpuOp):
        if self.pre_scalar_op:
            pre = "pre=%s,red=" % str(self.pre_scalar_op)
        return "GpuCAReduce{%s%s}{%s}" % (
-                pre,
-                str(self.scalar_op),
-                ','.join(str(i) for i in self.reduce_mask)
-                )
+            pre, str(self.scalar_op),
+            ','.join(str(i) for i in self.reduce_mask))

    def __setstate__(self, d):
        self.__dict__.update(d)
@@ -775,15 +772,18 @@ class GpuCAReduce(GpuOp):
            # check if the tensor is ccontiguous, if true, use the c_code_reduce_ccontig code.
            # TODO: check if we are ccontiguous when we un-dimshuffle
            # TODO: if only some dims are ccontiguous, call version with less dims.
-            print('if(CudaNdarray_is_c_contiguous(%(x)s)){'%locals(), file=sio)
+            print('if(CudaNdarray_is_c_contiguous( %(x)s)){' % locals(),
+                  file=sio)
            self.c_code_reduce_ccontig(sio, node, name, x, z, fail)
            print("}else{", file=sio)
-            getattr(self, 'c_code_reduce_%s'%(''.join(
-                str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail)
+            getattr(self, 'c_code_reduce_%s' % (''.join(
+                str(i) for i in self.reduce_mask)))(
+                    sio, node, name, x, z, fail)
            print("}", file=sio)
        else:
-            getattr(self, 'c_code_reduce_%s'%(''.join(
-                str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail)
+            getattr(self, 'c_code_reduce_%s' % (''.join(
+                str(i) for i in self.reduce_mask)))(
+                    sio, node, name, x, z, fail)

        # \end bracket the reduction ...
        print("""
@@ -976,7 +976,7 @@ class GpuCAReduce(GpuOp):
            assert isinstance(self.scalar_op, (scal.Maximum,
                                               scal.Minimum))
            if self.pre_scalar_op:
-                #dtype = node.inputs[0].dtype
+                # dtype = node.inputs[0].dtype
                dtype = 'float32'

                dummy_var = scal.Scalar(dtype=dtype)()
@@ -1834,12 +1834,15 @@ class GpuCAReduce(GpuOp):
        version = [15]  # the version corresponding to the c code in this Op

        # now we insert versions for the ops on which we depend...
-        scalar_node = Apply(self.scalar_op,
-                [Scalar(dtype=input.type.dtype)() for input in node.inputs],
-                [Scalar(dtype=output.type.dtype)() for output in node.outputs])
+        Apply(self.scalar_op,
+              [Scalar(
+                  dtype=input.type.dtype)() for input in node.inputs],
+              [Scalar(
+                  dtype=output.type.dtype)() for output in node.outputs])
        version.extend(self.scalar_op.c_code_cache_version())
        for i in node.inputs + node.outputs:
-            version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
+            version.extend(
+                Scalar(dtype=i.type.dtype).c_code_cache_version())
        if all(version):
            return tuple(version)
        else:
@@ -1946,10 +1949,11 @@ class GpuCAReduce(GpuOp):
                %(reducebuf)s
            }
            """ % locals(), file=sio)
-        #01, 011, 0111
+        # 01, 011, 0111
        if (0 == self.reduce_mask[0] and
                all(self.reduce_mask[1:]) and
                nd_in in[2, 3, 4]):
+
            # this kernel uses one block for each row.
            # threads per block for each element per row.

@@ -2117,10 +2121,10 @@ class GpuCAReduce(GpuOp):
            # this kernel uses one block for multiple column(up to 32TODO),
            # threads per block for each element per column.

-# thread.x = dim 2 contiguous
-# thread.y = dim 1
-# block.x = dim 0
-# block.y = dim 1 rest
+            # thread.x = dim 2 contiguous
+            # thread.y = dim 1
+            # block.x = dim 0
+            # block.y = dim 1 rest
            init = self._k_init(node, nodename)
            decl = self._k_decl(node, nodename, pattern="010_inner")
            reducebuf = self._k_reduce_buf_multiple('Z[i0 * sZ0 + i2*sZ1]',
@@ -2470,7 +2474,7 @@ class GpuReshape(tensor.Reshape, GpuOp):
                if (x.size % ss) != 0:
                    raise ValueError("When using -1 in new shape, the computed new shape must be an multiple of the original shape.")
                shp_new = numpy.copy(shp)
-                shp_new[m1_idx] = x.size/ss
+                shp_new[m1_idx] = x.size / ss
                shp = shp_new

            else:
@@ -2721,7 +2725,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):

    def perform(self, node, inp, out_):
        # This don't work as CudaNdarray_Subscript() don't support it.
-        #super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
+        # super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
        x, idx = inp
        out, = out_
        x_orig = x
@@ -2733,7 +2737,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
        if x.ndim <= 3:
            # CudaNdarray.take only supports ndim <= 3
            if self.perform_using_take is not None:
-                assert self.perform_using_take == True, (
+                assert self.perform_using_take is True, (
                    "GpuAdvancedSubtensor1 used the fast version")
            if idx.dtype != numpy.int64:
                if idx.dtype in [numpy.int8, numpy.int16, numpy.int32,
@@ -2762,7 +2766,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
            out[0] = o
        else:
            if self.perform_using_take is not None:
-                assert self.perform_using_take == False, (
+                assert self.perform_using_take is False, (
                    "GpuAdvancedSubtensor1 didn't use the fast version")
            if out_[0][0] is None or out_[0][0].shape != out_shape:
                o = cuda_ndarray.cuda_ndarray.CudaNdarray.zeros(out_shape)
@@ -3006,8 +3010,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
        convert_map = {8: tensor.basic._convert_to_int8,
                       16: tensor.basic._convert_to_int16,
                       32: tensor.basic._convert_to_int32,
-                       64: tensor.basic._convert_to_int64
-        }
+                       64: tensor.basic._convert_to_int64}
        intwidth = theano.configdefaults.python_int_bitwidth()
        ilist_ = convert_map[intwidth](ilist_)

@@ -3354,7 +3357,6 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
        return Apply(self, [x], [out_type()])


-
 def gpu_flatten(x, outdim=1):
    """
    Implement flatten on the gpu.
@@ -3378,9 +3380,9 @@ def gpu_flatten(x, outdim=1):
    """
    x = as_cuda_ndarray_variable(x)
    if outdim > 1:
-        dims = tuple(x.shape[:outdim-1])+(-1,)
+        dims = tuple(x.shape[:outdim - 1]) + (-1, )
    else:
-        dims = (-1,)
+        dims = (-1, )
    return GpuReshape(outdim)(x, dims)


@@ -3408,12 +3410,11 @@ class GpuJoin(tensor.Join, GpuOp):
        as_tensor_variable_args = [as_cuda_ndarray_variable(x)
                                   for x in tensors]

-        output_maker = \
-                lambda bcast: CudaNdarrayType(broadcastable=bcast)()
+        def output_maker(bcast):
+            return(CudaNdarrayType(broadcastable=bcast)())

-        return tensor.Join._make_node_internal(self,
-                        axis, tensors,
-                        as_tensor_variable_args, output_maker)
+        return tensor.Join._make_node_internal(
+            self, axis, tensors, as_tensor_variable_args, output_maker)

    def perform(self, node, axis_and_tensors, out_):
        out, = out_
@@ -3464,7 +3465,7 @@ class GpuJoin(tensor.Join, GpuOp):
        # except for 'axis'

        def construct_slices(curlen):
-            slices = [slice(None, None, None) for i in \
+            slices = [slice(None, None, None) for i in
                      xrange(len(template_shape))]
            slices[axis] = slice(curpos, curpos + curlen, None)
            return tuple(slices)
@@ -3829,8 +3830,8 @@ class GpuAlloc(GpuAllocEmpty):
                # If the output is a constant, it will have to be deepcopied
                # each time the function is called.  So we do not fold.
                return False
-            elif (  # The following ops work inplace of their input id 0.
-                  client[1] == 0 and
+            # Else if the following ops work inplace of their input id 0.
+            elif(client[1] == 0 and
                 isinstance(client[0].op, (
                     # Ops that will work inplace on the Alloc. So if they
                     # get constant_folded, they would copy the
@@ -3844,8 +3845,7 @@ class GpuAlloc(GpuAllocEmpty):
                     GpuAdvancedIncSubtensor1,
                     theano.sandbox.cuda.blas.GpuGemm,
                     theano.sandbox.cuda.blas.GpuGemv,
-                      theano.sandbox.cuda.blas.GpuGer,
-                  ))):
+                     theano.sandbox.cuda.blas.GpuGer,))):
                return False
            # If the clients is a transfer, we don't want to fold. We
            # let the moving opt finish before deciding what to do.