提交 54fe4a7f authored 作者: Chiheb Trabelsi's avatar Chiheb Trabelsi

opt.py has been modified in order to respect the flake8 style.

上级 1a3948cc
......@@ -10,22 +10,32 @@ import warnings
import numpy
from six.moves import reduce, xrange
from . import dnn
import theano
from theano import scalar as scal
from theano import config, tensor, gof
import theano.ifelse
import theano.tensor.signal.pool
import theano.tensor.nnet
import theano.tensor.nnet.neighbours
# Convolution
from theano.tensor.nnet import conv
from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
from theano.tensor.nnet.ConvTransp3D import ConvTransp3D
# Pooling
import theano.tensor.signal.pool as pool
from theano.compile import optdb
from theano.gof import (local_optimizer, EquilibriumDB, ProxyDB,
Optimizer, TopoOptimizer, toolbox)
from theano.gof.opt import LocalMetaOptimizer
from theano.sandbox.cuda.basic_ops import gpu_join, GpuJoin
from theano.sandbox.cuda import as_cuda_ndarray_variable
from theano.sandbox.cuda.basic_ops import (
gpu_eye, gpu_contiguous,
gpu_from_host, host_from_gpu, GpuFromHost, HostFromGpu,
GpuContiguous,
GpuElemwise, GpuDimShuffle, GpuReshape, GpuCAReduce,
GpuFlatten, gpu_flatten,
gpu_flatten,
GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit, GpuAllocEmpty)
......@@ -137,8 +147,6 @@ register_opt(name='local_gpu_reshape_chain')(
# This is a partial list of CPU ops that can be in some circonstance
# moved to the GPU. This list is used by an optimization.
# Hopefully, we can keep this list up to date.
import theano.tensor.signal.pool
import theano.tensor.nnet.neighbours
cpu_ops_moved_to_gpu = [
tensor.blas.Dot22, tensor.blas.Dot22Scalar, tensor.blas.Gemm,
tensor.blas.Gemv, tensor.blas.Ger, tensor.nnet.conv.ConvOp,
......@@ -850,8 +858,8 @@ def local_gpu_careduce(node):
if x.type == node.outputs[0].type:
return [x]
elif (all([c != "output" and isinstance(c.op, GpuFromHost)
for c, i in node.outputs[0].clients])
and x.owner and x.owner.op.__class__ in
for c, i in node.outputs[0].clients]) and
x.owner and x.owner.op.__class__ in
cpu_ops_moved_to_gpu):
# It is not always good to transfer the reduction to
# the GPU when the clients are on the GPU but not the
......@@ -1023,7 +1031,8 @@ def local_gpu_flatten(node):
return [gpu_flatten(host_input.owner.inputs[0], outdim)(
as_cuda_ndarray_variable(host_input.owner.inputs[0]))]
if isinstance(node.op, tensor.Flatten):
x, = node.inputs
x, shp = node.inputs
outdim = node.op.outdim
if x.owner and isinstance(x.owner.op, HostFromGpu):
outdim = node.op.outdim
gpu_x, = x.owner.inputs
......@@ -1050,15 +1059,13 @@ def local_gpu_subtensor(node):
*coords)]
if isinstance(node.op, tensor.Subtensor):
x = node.inputs[0]
if (x.owner and
isinstance(x.owner.op, HostFromGpu) and
x.dtype == "float32"):
if (x.owner and x.dtype == "float32" and
isinstance(x.owner.op, HostFromGpu)):
gpu_x = x.owner.inputs[0]
if (gpu_x.owner and
isinstance(gpu_x.owner.op, GpuFromHost) and
# And it is a shared var or an input of the graph.
not gpu_x.owner.inputs[0].owner):
if (gpu_x.owner and # And it is a shared var or an input of the graph.
not(gpu_x.owner.inputs[0].owner) and
isinstance(gpu_x.owner.op, GpuFromHost)):
if len(x.clients) == 1:
if any([n == 'output' or isinstance(n.op, GpuOp)
......@@ -1119,9 +1126,7 @@ def local_gpu_advanced_incsubtensor1(node):
'least \'0.6\'.', stacklevel=1)
active_device_no = theano.sandbox.cuda.active_device_number()
compute_capability = device_properties(active_device_no)['major']
if (compute_capability < 2 or
x.ndim != 2 or
y.ndim != 2):
if (compute_capability < 2 or y.ndim != 2 or x.ndim != 2):
gpu_op = GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc)
......@@ -1162,9 +1167,7 @@ def local_gpu_advanced_incsubtensor1(node):
active_device_no = theano.sandbox.cuda.active_device_number()
compute_capability = device_properties(active_device_no)['major']
if (compute_capability < 2 or
x.ndim != 2 or
y.ndim != 2):
if (compute_capability < 2 or y.ndim != 2 or x.ndim != 2):
gpu_op = GpuAdvancedIncSubtensor1(
set_instead_of_inc=set_instead_of_inc)
else:
......@@ -1203,8 +1206,8 @@ def local_gpu_incsubtensor(node):
# Incrementing a float32 x results in a float32
# output even if y is float64, so we can downcast
# y to put it on GPU
elif type(node.op) == tensor.IncSubtensor and \
node.inputs[0].dtype == "float32":
elif (type(node.op) == tensor.IncSubtensor and
node.inputs[0].dtype == "float32"):
x, y = node.inputs[0:2]
assert isinstance(x.type, tensor.TensorType)
assert isinstance(y.type, tensor.TensorType)
......@@ -1346,8 +1349,6 @@ def cast(x, dtype):
cast_op = theano.tensor.Elemwise(scal.Identity(scal.specific_out(stype)))
return cast_op(x)
import theano.tensor.nnet
@register_opt()
@local_optimizer([tensor.nnet.CrossentropySoftmaxArgmax1HotWithBias])
......@@ -1419,18 +1420,13 @@ def local_gpu_softmax_with_bias(node):
return False
# Convolution
from theano.tensor.nnet import conv
def _gpu_conv_to_fftconv(node):
# shared helper function for local_conv_fft_valid and local_conv_fft_full.
# we import conv2d_fft locally to avoid pycuda warnings
from theano.sandbox.cuda.fftconv import conv2d_fft
kwargs = {'border_mode': node.op.border_mode}
if (node.op.imshp is not None and
node.op.imshp[-1] is not None and
node.op.imshp[-1] % 2 == 1):
if (node.op.imshp is not None and node.op.imshp[-1] % 2 == 1 and
node.op.imshp[-1] is not None):
kwargs['pad_last_dim'] = True
# If the user supplied the full nonsymbolic image_shape and
......@@ -1459,9 +1455,8 @@ def _gpu_conv_to_fftconv(node):
@local_optimizer([GpuConv])
def local_conv_fft_valid(node):
if isinstance(node.op, GpuConv):
if (node.op.border_mode == 'valid' and
node.op.subsample == (1, 1) and
node.op.fft_opt):
if (node.op.border_mode == 'valid' and node.op.fft_opt and
node.op.subsample == (1, 1)):
return [_gpu_conv_to_fftconv(node)]
return False
......@@ -1470,9 +1465,8 @@ def local_conv_fft_valid(node):
@local_optimizer([GpuConv])
def local_conv_fft_full(node):
if isinstance(node.op, GpuConv):
if (node.op.border_mode == 'full' and
node.op.subsample == (1, 1) and
node.op.fft_opt):
if (node.op.border_mode == 'full' and node.op.fft_opt and
node.op.subsample == (1, 1)):
return [_gpu_conv_to_fftconv(node)]
return
......@@ -1659,7 +1653,6 @@ conv_groupopt.register('conv_fft_full', local_conv_fft_full, 10,
'conv_fft')
# cuDNN is the second, but only registered if cuDNN is available.
# It can be disabled by excluding 'conv_dnn' or 'cudnn'.
from . import dnn
# We can't check at import if dnn is available, so we must always
# register it. This do not cause problem as if it is not avail, the
# opt will do nothing.
......@@ -1708,8 +1701,7 @@ class ConvMetaOptimizer(LocalCudaMetaOptimizer):
shapes = ((node.op.bsize,) + node.op.imshp,
(node.op.nkern, nchannels) + node.op.kshp)
for (var, shape) in zip(vars, shapes):
if ((var in inputs) and
(shape is not None) and
if ((var in inputs) and (shape is not None) and
not any(s is None for s in shape)):
result[var] = theano.shared(
......@@ -1763,8 +1755,6 @@ def local_conv3d_fft(node):
gpu_optimizer.register("conv3d_fft", local_conv3d_fft)
from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
@local_optimizer([ConvGrad3D])
def local_convgrad3d_fft(node):
......@@ -1794,8 +1784,6 @@ def local_convgrad3d_fft(node):
gpu_optimizer.register("convgrad3d_fft", local_convgrad3d_fft)
from theano.tensor.nnet.ConvTransp3D import ConvTransp3D
@local_optimizer([ConvTransp3D])
def local_convtransp3d_fft(node):
......@@ -1894,15 +1882,11 @@ def local_convtransp3d_gemm(node):
gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm)
# Pooling
import theano.tensor.signal.pool as pool
@register_opt()
@local_optimizer([pool.Pool])
def local_gpu_downsample_factor_max(node):
if (isinstance(node.op, pool.Pool)
and node.op.ds == node.op.st):
if (isinstance(node.op, pool.Pool) and
node.op.ds == node.op.st):
assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding',
'mode')
......@@ -1917,9 +1901,7 @@ def local_gpu_downsample_factor_max(node):
@register_opt()
@local_optimizer([pool.MaxPoolGrad])
def local_gpu_downsample_factor_max_grad(node):
if (isinstance(node.op, pool.MaxPoolGrad) and
node.op.ds == node.op.st):
if (isinstance(node.op, pool.MaxPoolGrad) and node.op.ds == node.op.st):
assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding',
'mode')
if (node.op.padding != (0, 0) or
......@@ -1955,9 +1937,6 @@ def local_gpu_downsample_factor_max_grad_grad(node):
as_cuda_ndarray_variable(gx)))]
from theano.sandbox.cuda.basic_ops import gpu_join, GpuJoin
@register_opt()
@local_optimizer([tensor.Join])
def local_gpu_join(node):
......@@ -2310,6 +2289,7 @@ def local_gpu_eye(node):
if (host_input.owner and
isinstance(host_input.owner.op, tensor.Eye) and
host_input.owner.op.dtype == "float32"):
if tensor.extract_constant(host_input.owner.inputs[2]) != 0:
return
return [gpu_eye(*host_input.owner.inputs)]
......@@ -2492,8 +2472,8 @@ def gpuScanOptimization(node):
return _outputs
# scan(host_from_gpu) -> host_from_gpu(GPUscan)
if (type(node.op) == scan_op.Scan
and not node.op.info['gpu']):
if (type(node.op) == scan_op.Scan and
not node.op.info['gpu']):
if any([(i.owner and isinstance(i.owner.op, HostFromGpu))
for i in node.inputs]):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论