Flake8 for elemwise.py

e4ae792a · Arnaud Bergeron · 2e3d841e · e4ae792a · e4ae792a
--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -4,7 +4,6 @@ import os
 from theano.compat import izip
 import numpy

-import theano
 from theano import Apply, scalar, config
 from theano import scalar as scal
 from six.moves import StringIO, xrange
@@ -94,7 +93,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
        try:
            support_code = self.scalar_op.c_support_code()
            if (support_code.strip() != "#define THEANO_MACRO_MOD(x,y) (x % y)" and
-                support_code.strip() != ""):
+                    support_code.strip() != ""):
                # The macro is fine, the C++ struct is not.
                raise SupportCodeError(support_code)
        except MethodNotDefined:
@@ -108,7 +107,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
        scal_v_ins = [scalar.get_scalar_type(i.dtype) for i in node.inputs]

        outs = [make_argument(o, 'o%d' % (n,)) for n, o in
-                enumerate(node.outputs) if not n in self.inplace_pattern]
+                enumerate(node.outputs) if n not in self.inplace_pattern]
        scal_v_outs = [scalar.get_scalar_type(o.dtype) for o in node.outputs]

        fake_node = Apply(self.scalar_op, [i() for i in scal_v_ins],
@@ -132,7 +131,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
            else:
                scal_out.append(arg.name + '[i]')

-        kop = self.scalar_op.c_code(fake_node, nodename+'_scalar',
+        kop = self.scalar_op.c_code(fake_node, nodename + '_scalar',
                                    scal_in, scal_out,
                                    dict(fail='return;'))

@@ -169,9 +168,9 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
                        ("npy_float16", "ga_half"),
                        ("npy_float32", "ga_float"),
                        ("npy_float64", "ga_double"),
-            ]:
+                        ]:
            kop = kop.replace(npy, ga)
-        return ElemwiseKernel(None, inps+outs, kop, preamble=support_code)
+        return ElemwiseKernel(None, inps + outs, kop, preamble=support_code)

    def c_header_dirs(self):
        if pygpu.get_default_context().kind == 'opencl':
@@ -399,7 +398,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
            param.append("(void *)&%(z)s->ga.dimensions[%(i)d]" % dict(z=outputs[0],
                                                                       i=i))
        for n, (name, var) in enumerate(zip(inputs + outputs,
-                                       node.inputs + node.outputs)):
+                                            node.inputs + node.outputs)):
            if (n - len(inputs)) in self.inplace_pattern:
                continue
            dtype = dtype_to_ctype(var.dtype)
@@ -417,7 +416,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
                         GpuKernel_error(&%(kname)s, err));
            %(fail)s;
        }
-        """ % dict(kname=kname,fail=fail)
+        """ % dict(kname=kname, fail=fail)
        if config.gpuarray.sync:
            code += """
            err = GpuArray_sync(&%(z)s->ga);
@@ -495,7 +494,7 @@ class GpuDimShuffle(HideC, DimShuffle):

        res = input

-        res = res.transpose(self.shuffle+self.drop)
+        res = res.transpose(self.shuffle + self.drop)

        shape = list(res.shape[:len(self.shuffle)])
        for augm in self.augment:
@@ -533,7 +532,7 @@ class GpuDimShuffle(HideC, DimShuffle):
            Py_DECREF(tmp);
            return res;
        }
-        """ % dict(shuffle=', '.join(str(a) for a in (self.shuffle+self.drop)),
+        """ % dict(shuffle=', '.join(str(a) for a in (self.shuffle + self.drop)),
                   name=name, nd_out=len(self.new_order),
                   copy_shape=copy_shape(len(self.new_order)))

@@ -581,7 +580,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
    pre_scalar_op
        If present, must be a scalar op with only 1 input. We will execute it
        on the input value before reduction.
-    
+
    Examples
    --------
    When scalar_op is a theano.scalar.basic.Add instance:
@@ -671,8 +670,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
        if self.pre_scalar_op:
            # Currently we only tested pre_scalar_op that don't cause
            # upcast.
-            d1 = self.__class__(scalar_op=self.scalar_op)(Elemwise(self.pre_scalar_op)(x))
-            assert d1.dtype == ret.outputs[0].dtype
            assert Elemwise(self.pre_scalar_op)(x).dtype == x.dtype
        if self.reduce_mask is None:
            if self.axis is None:
@@ -687,8 +684,8 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
        if (x.type.ndim != len(self.reduce_mask)):
            raise TypeError("x must have rank %i" % len(self.reduce_mask))
        if ("complex" in x.dtype or
-            "complex" in ret.outputs[0].dtype or
-            "complex" in self._acc_dtype(x.dtype)):
+                "complex" in ret.outputs[0].dtype or
+                "complex" in self._acc_dtype(x.dtype)):
            raise NotImplementedError("We don't support complex in gpu reduction")
        return Apply(self, [x], [GpuArrayType(ret.outputs[0].dtype,
                                              ret.outputs[0].type.broadcastable)()])
@@ -863,14 +860,16 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
            # check if the tensor is ccontiguous, if true, use the c_code_reduce_ccontig code.
            # TODO: check if we are ccontiguous when we un-dimshuffle
            # TODO: if only some dims are ccontiguous, call version with less dims.
-            print('if(%(x)s->ga.flags & GA_C_CONTIGUOUS){'%locals(), file=sio)
+            print('if(%(x)s->ga.flags & GA_C_CONTIGUOUS){' % locals(),
+                  file=sio)
            self.c_code_reduce_ccontig(sio, node, name, x, z, fail)
            print("}else{", file=sio)
-            getattr(self, 'c_code_reduce_%s'%(''.join(
-                str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail)
+            getattr(self, 'c_code_reduce_%s' %
+                    (''.join(str(i) for i in self.reduce_mask)))(
+                sio, node, name, x, z, fail)
            print("}", file=sio)
        else:
-            getattr(self, 'c_code_reduce_%s'%(''.join(
+            getattr(self, 'c_code_reduce_%s' % (''.join(
                str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail)

        # \end bracket the reduction ...
@@ -1094,8 +1093,8 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
        else:
            assert isinstance(self.scalar_op, (scal.Maximum,
                                               scal.Minimum))
-            if self.pre_scalar_op:  # TODO, multi_dtype!
-                #dtype = node.inputs[0].dtype
+            if self.pre_scalar_op:  # TODO: multiple dtypes
+                # dtype = node.inputs[0].dtype
                dtype = 'float32'

                dummy_var = scal.Scalar(dtype=dtype)()
@@ -1171,7 +1170,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):

        Parameters
        ----------
-        node, name, sub 
+        node, name, sub
            These should be passed through from the original call to c_code.

        """
@@ -1411,7 +1410,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):

    def c_code_reduce_01X(self, sio, node, name, x, z, fail, N):
        """
-        
+
        Parameters
        ----------
        N
@@ -1946,9 +1945,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
        version = [16]  # the version corresponding to the c code in this Op

        # now we insert versions for the ops on which we depend...
-        scalar_node = Apply(self.scalar_op,
-                [Scalar(dtype=input.type.dtype)() for input in node.inputs],
-                [Scalar(dtype=output.type.dtype)() for output in node.outputs])
        version.extend(self.scalar_op.c_code_cache_version())
        for i in node.inputs + node.outputs:
            version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
@@ -1962,7 +1958,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
        in_dtype = node.inputs[0].dtype
        out_dtype = node.outputs[0].dtype
        acc_dtype = self._acc_dtype(node.inputs[0].dtype)
-        flags=Kernel.get_flags(in_dtype, acc_dtype, out_dtype)
+        flags = Kernel.get_flags(in_dtype, acc_dtype, out_dtype)
        in_type = gpuarray.dtype_to_ctype(in_dtype)
        out_type = gpuarray.dtype_to_ctype(out_dtype)
        acc_type = gpuarray.dtype_to_ctype(acc_dtype)
@@ -2106,10 +2102,10 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
                ]
            kernels.append(Kernel(code=sio.getvalue(), name=kname,
                                  params=params, flags=flags, objvar=k_var))
-        #01, 011, 0111
+        # 01, 011, 0111
        if (0 == self.reduce_mask[0] and
-            all(self.reduce_mask[1:]) and
-            nd_in in[2, 3, 4]):
+                all(self.reduce_mask[1:]) and
+                nd_in in[2, 3, 4]):
            # this kernel uses one block for each row.
            # threads per block for each element per row.

@@ -2303,10 +2299,10 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
            # this kernel uses one block for multiple column(up to 32TODO),
            # threads per block for each element per column.

-# thread.x = dim 2 contiguous
-# thread.y = dim 1
-# block.x = dim 0
-# block.y = dim 1 rest
+            # thread.x = dim 2 contiguous
+            # thread.y = dim 1
+            # block.x = dim 0
+            # block.y = dim 1 rest
            init = self._k_init(node, nodename)
            decl, kname, params, k_var = self._k_decl(node, nodename, pattern="010_inner")
            reducebuf = self._k_reduce_buf_multiple('Z[i0 * sZ0 + i2*sZ1]',
@@ -2515,7 +2511,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
            kernels.append(Kernel(code=sio.getvalue(), name=kname,
                                  params=params, flags=flags, objvar=k_var))
        if self.reduce_mask == (0, 0, 1, 1):
-             # this kernel uses one block for each row,
+            # this kernel uses one block for each row,
            # threads per block for each element per row.
            reducebuf = self._k_reduce_buf('Z[i0 * sZ0 + i1 * sZ1]',
                                           node, nodename, sub={})
@@ -2625,7 +2621,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
                                             {}, True)
            reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA1])")
            kname = "kernel_reduce_1011"
-            k_var= "kernel_reduce_1011_" + nodename
+            k_var = "kernel_reduce_1011_" + nodename
            sio = StringIO()
            print("""
            KERNEL void %(kname)s(
@@ -2712,7 +2708,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        # cache the kernel object
        self.get_kernel_cache(node)
        return super(GpuCAReduceCPY, self).make_thunk(node, storage_map,
-                                                   compute_map, no_recycling)
+                                                      compute_map, no_recycling)

    def get_kernel_cache(self, node):
        attr = '@cache_reduction_k'
@@ -2753,7 +2749,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
                       flags=Kernel.get_flags(node.inputs[0].type.dtype,
                                              acc_dtype,
                                              node.outputs[0].type.dtype),
-                       objvar='k_reduk_'+name)]
+                       objvar='k_reduk_' + name)]

    def c_code(self, node, name, inp, out, sub):
        if not any(getattr(self, 'redux', [node.inputs[0].ndim != 0])):
@@ -2768,8 +2764,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):

        if (%(sync)d)
            GpuArray_sync(&%(out)s->ga);
-""" % dict(out=out[0], inp=inp[0], fail=sub['fail'],
-           sync=bool(config.gpuarray.sync))
+        """ % dict(out=out[0], inp=inp[0], fail=sub['fail'],
+                   sync=bool(config.gpuarray.sync))
        k = self.get_kernel_cache(node)
        _, src, _, ls = k._get_basic_kernel(k.init_local_size,
                                            node.inputs[0].ndim)
@@ -2816,8 +2812,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
                 %(fail)s
             }
         }
-""" % dict(output=output, nd_out=nd_out, fail=sub['fail'],
-           out_type=dtype_to_typecode(node.outputs[0].type.dtype))
+        """ % dict(output=output, nd_out=nd_out, fail=sub['fail'],
+                   out_type=dtype_to_typecode(node.outputs[0].type.dtype))
        else:
            code += """
        if (%(output)s == NULL || %(output)s->ga.nd != 0) {
@@ -2828,8 +2824,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
                %(fail)s
            }
        }
-""" % dict(output=output, fail=sub['fail'],
-           out_type=dtype_to_typecode(node.outputs[0].type.dtype))
+        """ % dict(output=output, fail=sub['fail'],
+                   out_type=dtype_to_typecode(node.outputs[0].type.dtype))

        if acc_dtype != node.outputs[0].type.dtype:
            code += """
@@ -2837,12 +2833,13 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
                          %(acc_type)s, GA_C_ORDER, pygpu_default_context(),
                          Py_None);
        if (!tmp) %(fail)s
-""" % dict(output=output, fail=sub['fail'], acc_type=dtype_to_typecode(acc_dtype))
+        """ % dict(output=output, fail=sub['fail'],
+                   acc_type=dtype_to_typecode(acc_dtype))
        else:
            code += """
        tmp = %(output)s;
        Py_INCREF(tmp);
-""" % dict(output=output)
+        """ % dict(output=output)

        # We need the proxies since we are passing a pointer to the
        # data into the call and therefore we need a real copy of the
@@ -2850,7 +2847,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        code += """
        args[0] = &n;
        args[1] = tmp->ga.data;
-""" % dict(output=output)
+        """ % dict(output=output)

        p = 2
        for i in range(node.inputs[0].ndim):
@@ -2858,7 +2855,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        proxy_dim[%(i)s] = %(input)s->ga.dimensions[%(i)s];
        args[%(p)s] = &proxy_dim[%(i)s];
        n *= %(input)s->ga.dimensions[%(i)s];
-""" % dict(i=i, p=p, input=input)
+        """ % dict(i=i, p=p, input=input)
            p += 1
            if not redux[i]:
                code += "gs *= %(input)s->ga.dimensions[%(i)s];" % dict(input=input, i=i)
@@ -2867,14 +2864,14 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
        args[%(p)s] = %(input)s->ga.data;
        proxy_off = %(input)s->ga.offset;
        args[%(p)s+1] = &proxy_off;
-""" % dict(p=p, input=input)
+        """ % dict(p=p, input=input)
        p += 2

        for i in range(node.inputs[0].ndim):
            code += """
        proxy_str[%(i)s] = %(input)s->ga.strides[%(i)s];
        args[%(p)s] = &proxy_str[%(i)s];
-""" % dict(p=p, i=i, input=input)
+        """ % dict(p=p, i=i, input=input)
            p += 1

        code += """
@@ -2911,9 +2908,9 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
                %(fail)s
            }
        }
-""" % dict(k_var='k_reduk_'+name, sync=bool(config.gpuarray.sync),
-           ls=ls, fail=sub['fail'], output=output, input=input,
-           cast_out=bool(acc_dtype != node.outputs[0].type.dtype))
+        """ % dict(k_var='k_reduk_' + name, sync=bool(config.gpuarray.sync),
+                   ls=ls, fail=sub['fail'], output=output, input=input,
+                   cast_out=bool(acc_dtype != node.outputs[0].type.dtype))

        return code

@@ -2942,8 +2939,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
            redux = self.redux

        if any(redux):
-            output[0] = self.get_kernel_cache(node)(input).astype(copy=False,
-                                             dtype=node.outputs[0].type.dtype)
+            output[0] = self.get_kernel_cache(node)(input).astype(
+                copy=False, dtype=node.outputs[0].type.dtype)
        else:
            output[0] = pygpu.gpuarray.array(input, copy=True,
                                             dtype=node.outputs[0].type.dtype)

--- a/theano/tests/test_flake8.py
+++ b/theano/tests/test_flake8.py
@@ -157,7 +157,6 @@ whitelist_flake8 = [
    "sandbox/linalg/ops.py",
    "sandbox/linalg/__init__.py",
    "sandbox/linalg/tests/test_linalg.py",
-    "sandbox/gpuarray/elemwise.py",
    "sandbox/gpuarray/type.py",
    "sandbox/gpuarray/__init__.py",
    "sandbox/gpuarray/kernel_codegen.py",