opt.py has been modified in order to respect the flake8 style.

54fe4a7f · Chiheb Trabelsi · 1a3948cc · 54fe4a7f
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -10,22 +10,32 @@ import warnings
 import numpy
 from six.moves import reduce, xrange

+from . import dnn
 import theano
 from theano import scalar as scal
 from theano import config, tensor, gof
 import theano.ifelse
-
+import theano.tensor.signal.pool
+import theano.tensor.nnet
+import theano.tensor.nnet.neighbours
+# Convolution
+from theano.tensor.nnet import conv
+from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
+from theano.tensor.nnet.ConvTransp3D import ConvTransp3D
+# Pooling
+import theano.tensor.signal.pool as pool
 from theano.compile import optdb
 from theano.gof import (local_optimizer, EquilibriumDB, ProxyDB,
                        Optimizer, TopoOptimizer, toolbox)
 from theano.gof.opt import LocalMetaOptimizer
+from theano.sandbox.cuda.basic_ops import gpu_join, GpuJoin
 from theano.sandbox.cuda import as_cuda_ndarray_variable
 from theano.sandbox.cuda.basic_ops import (
    gpu_eye, gpu_contiguous,
    gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
    GpuContiguous,
    GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce,
-    GpuFlatten, gpu_flatten,
+    gpu_flatten,
    GpuSubtensor, GpuAdvancedSubtensor1,
    GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
    GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit, GpuAllocEmpty)
@@ -137,8 +147,6 @@ register_opt(name='local_gpu_reshape_chain')(
 # This is a partial list of CPU ops that can be in some circonstance
 # moved to the GPU. This list is used by an optimization.
 # Hopefully, we can keep this list up to date.
-import theano.tensor.signal.pool
-import theano.tensor.nnet.neighbours
 cpu_ops_moved_to_gpu = [
    tensor.blas.Dot22, tensor.blas.Dot22Scalar, tensor.blas.Gemm,
    tensor.blas.Gemv, tensor.blas.Ger, tensor.nnet.conv.ConvOp,
@@ -850,8 +858,8 @@ def local_gpu_careduce(node):
            if x.type == node.outputs[0].type:
                return [x]
            elif (all([c != "output" and isinstance(c.op, GpuFromHost)
-                      for c, i in node.outputs[0].clients])
-                  and x.owner and x.owner.op.__class__ in
+                      for c, i in node.outputs[0].clients]) and
+                  x.owner and x.owner.op.__class__ in
                  cpu_ops_moved_to_gpu):
                # It is not always good to transfer the reduction to
                # the GPU when the clients are on the GPU but not the
@@ -1023,7 +1031,8 @@ def local_gpu_flatten(node):
            return [gpu_flatten(host_input.owner.inputs[0], outdim)(
                as_cuda_ndarray_variable(host_input.owner.inputs[0]))]
    if isinstance(node.op, tensor.Flatten):
-        x, = node.inputs
+        x, shp = node.inputs
+        outdim = node.op.outdim
        if x.owner and isinstance(x.owner.op, HostFromGpu):
            outdim = node.op.outdim
            gpu_x, = x.owner.inputs
@@ -1050,15 +1059,13 @@ def local_gpu_subtensor(node):
                                                *coords)]
    if isinstance(node.op, tensor.Subtensor):
        x = node.inputs[0]
-        if (x.owner and
-            isinstance(x.owner.op, HostFromGpu) and
-            x.dtype == "float32"):
+        if (x.owner and x.dtype == "float32" and
+                isinstance(x.owner.op, HostFromGpu)):

            gpu_x = x.owner.inputs[0]
-            if (gpu_x.owner and
-                isinstance(gpu_x.owner.op, GpuFromHost) and
-                # And it is a shared var or an input of the graph.
-                not gpu_x.owner.inputs[0].owner):
+            if (gpu_x.owner and  # And it is a shared var or an input of the graph.
+                    not(gpu_x.owner.inputs[0].owner) and
+                    isinstance(gpu_x.owner.op, GpuFromHost)):

                if len(x.clients) == 1:
                    if any([n == 'output' or isinstance(n.op, GpuOp)
@@ -1119,9 +1126,7 @@ def local_gpu_advanced_incsubtensor1(node):
                    'least \'0.6\'.', stacklevel=1)
            active_device_no = theano.sandbox.cuda.active_device_number()
            compute_capability = device_properties(active_device_no)['major']
-            if (compute_capability < 2 or
-                x.ndim != 2 or
-                y.ndim != 2):
+            if (compute_capability < 2 or y.ndim != 2 or x.ndim != 2):

                gpu_op = GpuAdvancedIncSubtensor1(
                    set_instead_of_inc=set_instead_of_inc)
@@ -1162,9 +1167,7 @@ def local_gpu_advanced_incsubtensor1(node):

            active_device_no = theano.sandbox.cuda.active_device_number()
            compute_capability = device_properties(active_device_no)['major']
-            if (compute_capability < 2 or
-                x.ndim != 2 or
-                y.ndim != 2):
+            if (compute_capability < 2 or y.ndim != 2 or x.ndim != 2):
                gpu_op = GpuAdvancedIncSubtensor1(
                    set_instead_of_inc=set_instead_of_inc)
            else:
@@ -1203,8 +1206,8 @@ def local_gpu_incsubtensor(node):
    # Incrementing a float32 x results in a float32
    # output even if y is float64, so we can downcast
    # y to put it on GPU
-    elif type(node.op) == tensor.IncSubtensor and \
-       node.inputs[0].dtype == "float32":
+    elif (type(node.op) == tensor.IncSubtensor and
+          node.inputs[0].dtype == "float32"):
        x, y = node.inputs[0:2]
        assert isinstance(x.type, tensor.TensorType)
        assert isinstance(y.type, tensor.TensorType)
@@ -1346,8 +1349,6 @@ def cast(x, dtype):
    cast_op = theano.tensor.Elemwise(scal.Identity(scal.specific_out(stype)))
    return cast_op(x)

-import theano.tensor.nnet
-

 @register_opt()
 @local_optimizer([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias])
@@ -1419,18 +1420,13 @@ def local_gpu_softmax_with_bias(node):
    return False


-# Convolution
-from theano.tensor.nnet import conv
-
-
 def _gpu_conv_to_fftconv(node):
    # shared helper function for local_conv_fft_valid and local_conv_fft_full.
    # we import conv2d_fft locally to avoid pycuda warnings
    from theano.sandbox.cuda.fftconv import conv2d_fft
    kwargs = {'border_mode': node.op.border_mode}
-    if (node.op.imshp is not None and
-        node.op.imshp[-1] is not None and
-        node.op.imshp[-1] % 2 == 1):
+    if (node.op.imshp is not None and node.op.imshp[-1] % 2 == 1 and
+            node.op.imshp[-1] is not None):

        kwargs['pad_last_dim'] = True
    # If the user supplied the full nonsymbolic image_shape and
@@ -1459,9 +1455,8 @@ def _gpu_conv_to_fftconv(node):
 @local_optimizer([GpuConv])
 def local_conv_fft_valid(node):
    if isinstance(node.op, GpuConv):
-        if (node.op.border_mode == 'valid' and
-            node.op.subsample == (1, 1) and
-            node.op.fft_opt):
+        if (node.op.border_mode == 'valid' and node.op.fft_opt and
+                node.op.subsample == (1, 1)):

            return [_gpu_conv_to_fftconv(node)]
        return False
@@ -1470,9 +1465,8 @@ def local_conv_fft_valid(node):
 @local_optimizer([GpuConv])
 def local_conv_fft_full(node):
    if isinstance(node.op, GpuConv):
-        if (node.op.border_mode == 'full' and
-            node.op.subsample == (1, 1) and
-            node.op.fft_opt):
+        if (node.op.border_mode == 'full' and node.op.fft_opt and
+                node.op.subsample == (1, 1)):

            return [_gpu_conv_to_fftconv(node)]
        return
@@ -1659,7 +1653,6 @@ conv_groupopt.register('conv_fft_full', local_conv_fft_full, 10,
                       'conv_fft')
 # cuDNN is the second, but only registered if cuDNN is available.
 # It can be disabled by excluding 'conv_dnn' or 'cudnn'.
-from . import dnn
 # We can't check at import if dnn is available, so we must always
 # register it. This do not cause problem as if it is not avail, the
 # opt will do nothing.
@@ -1708,8 +1701,7 @@ class ConvMetaOptimizer(LocalCudaMetaOptimizer):
        shapes = ((node.op.bsize,) + node.op.imshp,
                  (node.op.nkern, nchannels) + node.op.kshp)
        for (var, shape) in zip(vars, shapes):
-            if ((var in inputs) and
-                (shape is not None) and
+            if ((var in inputs) and (shape is not None) and
                    not any(s is None for s in shape)):

                result[var] = theano.shared(
@@ -1763,8 +1755,6 @@ def local_conv3d_fft(node):

 gpu_optimizer.register("conv3d_fft", local_conv3d_fft)

-from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
-

 @local_optimizer([ConvGrad3D])
 def local_convgrad3d_fft(node):
@@ -1794,8 +1784,6 @@ def local_convgrad3d_fft(node):

 gpu_optimizer.register("convgrad3d_fft", local_convgrad3d_fft)

-from theano.tensor.nnet.ConvTransp3D import ConvTransp3D
-

 @local_optimizer([ConvTransp3D])
 def local_convtransp3d_fft(node):
@@ -1894,15 +1882,11 @@ def local_convtransp3d_gemm(node):
 gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm)


-# Pooling
-import theano.tensor.signal.pool as pool
-
-
 @register_opt()
 @local_optimizer([pool.Pool])
 def local_gpu_downsample_factor_max(node):
-    if (isinstance(node.op, pool.Pool)
-        and node.op.ds == node.op.st):
+    if (isinstance(node.op, pool.Pool) and
+            node.op.ds == node.op.st):

        assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding',
                                     'mode')
@@ -1917,9 +1901,7 @@ def local_gpu_downsample_factor_max(node):
 @register_opt()
 @local_optimizer([pool.MaxPoolGrad])
 def local_gpu_downsample_factor_max_grad(node):
-    if (isinstance(node.op, pool.MaxPoolGrad) and
-        node.op.ds == node.op.st):
-
+    if (isinstance(node.op, pool.MaxPoolGrad) and node.op.ds == node.op.st):
        assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding',
                                     'mode')
        if (node.op.padding != (0, 0) or
@@ -1955,9 +1937,6 @@ def local_gpu_downsample_factor_max_grad_grad(node):
                                     as_cuda_ndarray_variable(gx)))]


-from theano.sandbox.cuda.basic_ops import gpu_join, GpuJoin
-
-
 @register_opt()
 @local_optimizer([tensor.Join])
 def local_gpu_join(node):
@@ -2310,6 +2289,7 @@ def local_gpu_eye(node):
        if (host_input.owner and
                isinstance(host_input.owner.op, tensor.Eye) and
                host_input.owner.op.dtype == "float32"):
+
            if tensor.extract_constant(host_input.owner.inputs[2]) != 0:
                return
            return [gpu_eye(*host_input.owner.inputs)]
@@ -2492,8 +2472,8 @@ def gpuScanOptimization(node):
            return _outputs

    # scan(host_from_gpu) -> host_from_gpu(GPUscan)
-    if (type(node.op) == scan_op.Scan
-        and not node.op.info['gpu']):
+    if (type(node.op) == scan_op.Scan and
+            not node.op.info['gpu']):

        if any([(i.owner and isinstance(i.owner.op, HostFromGpu))
                for i in node.inputs]):