提交 ed7759fb authored 作者: Chiheb Trabelsi's avatar Chiheb Trabelsi

basic_ops.py has been modified in order to respect the flake8 style

上级 200babca
......@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
import copy
import logging
import sys
import warnings
import numpy
from six import iteritems
from six.moves import StringIO, xrange
......@@ -12,6 +12,9 @@ from theano import gof, Type, Apply
from theano import tensor, scalar, config
from theano.gradient import grad_undefined
from theano.scalar import Scalar
from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.elemwise import NaiveAlgo
scal = scalar # somewhere scalar gets reassigned to be a function
......@@ -24,10 +27,6 @@ try:
except ImportError:
pass
from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.elemwise import NaiveAlgo
_logger_name = 'theano.sandbox.cuda.basic_ops'
_logger = logging.getLogger(_logger_name)
......@@ -596,10 +595,8 @@ class GpuCAReduce(GpuOp):
if self.pre_scalar_op:
pre = "pre=%s,red=" % str(self.pre_scalar_op)
return "GpuCAReduce{%s%s}{%s}" % (
pre,
str(self.scalar_op),
','.join(str(i) for i in self.reduce_mask)
)
pre, str(self.scalar_op),
','.join(str(i) for i in self.reduce_mask))
def __setstate__(self, d):
self.__dict__.update(d)
......@@ -775,15 +772,18 @@ class GpuCAReduce(GpuOp):
# check if the tensor is ccontiguous, if true, use the c_code_reduce_ccontig code.
# TODO: check if we are ccontiguous when we un-dimshuffle
# TODO: if only some dims are ccontiguous, call version with less dims.
print('if(CudaNdarray_is_c_contiguous(%(x)s)){'%locals(), file=sio)
print('if(CudaNdarray_is_c_contiguous( %(x)s)){' % locals(),
file=sio)
self.c_code_reduce_ccontig(sio, node, name, x, z, fail)
print("}else{", file=sio)
getattr(self, 'c_code_reduce_%s'%(''.join(
str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail)
getattr(self, 'c_code_reduce_%s' % (''.join(
str(i) for i in self.reduce_mask)))(
sio, node, name, x, z, fail)
print("}", file=sio)
else:
getattr(self, 'c_code_reduce_%s'%(''.join(
str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail)
getattr(self, 'c_code_reduce_%s' % (''.join(
str(i) for i in self.reduce_mask)))(
sio, node, name, x, z, fail)
# \end bracket the reduction ...
print("""
......@@ -976,7 +976,7 @@ class GpuCAReduce(GpuOp):
assert isinstance(self.scalar_op, (scal.Maximum,
scal.Minimum))
if self.pre_scalar_op:
#dtype = node.inputs[0].dtype
# dtype = node.inputs[0].dtype
dtype = 'float32'
dummy_var = scal.Scalar(dtype=dtype)()
......@@ -1834,12 +1834,15 @@ class GpuCAReduce(GpuOp):
version = [15] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype=output.type.dtype)() for output in node.outputs])
Apply(self.scalar_op,
[Scalar(
dtype=input.type.dtype)() for input in node.inputs],
[Scalar(
dtype=output.type.dtype)() for output in node.outputs])
version.extend(self.scalar_op.c_code_cache_version())
for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
version.extend(
Scalar(dtype=i.type.dtype).c_code_cache_version())
if all(version):
return tuple(version)
else:
......@@ -1946,10 +1949,11 @@ class GpuCAReduce(GpuOp):
%(reducebuf)s
}
""" % locals(), file=sio)
#01, 011, 0111
# 01, 011, 0111
if (0 == self.reduce_mask[0] and
all(self.reduce_mask[1:]) and
nd_in in[2, 3, 4]):
# this kernel uses one block for each row.
# threads per block for each element per row.
......@@ -2117,10 +2121,10 @@ class GpuCAReduce(GpuOp):
# this kernel uses one block for multiple column(up to 32TODO),
# threads per block for each element per column.
# thread.x = dim 2 contiguous
# thread.y = dim 1
# block.x = dim 0
# block.y = dim 1 rest
# thread.x = dim 2 contiguous
# thread.y = dim 1
# block.x = dim 0
# block.y = dim 1 rest
init = self._k_init(node, nodename)
decl = self._k_decl(node, nodename, pattern="010_inner")
reducebuf = self._k_reduce_buf_multiple('Z[i0 * sZ0 + i2*sZ1]',
......@@ -2470,7 +2474,7 @@ class GpuReshape(tensor.Reshape, GpuOp):
if (x.size % ss) != 0:
raise ValueError("When using -1 in new shape, the computed new shape must be an multiple of the original shape.")
shp_new = numpy.copy(shp)
shp_new[m1_idx] = x.size/ss
shp_new[m1_idx] = x.size / ss
shp = shp_new
else:
......@@ -2721,7 +2725,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
def perform(self, node, inp, out_):
# This don't work as CudaNdarray_Subscript() don't support it.
#super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
# super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
x, idx = inp
out, = out_
x_orig = x
......@@ -2733,7 +2737,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
if x.ndim <= 3:
# CudaNdarray.take only supports ndim <= 3
if self.perform_using_take is not None:
assert self.perform_using_take == True, (
assert self.perform_using_take is True, (
"GpuAdvancedSubtensor1 used the fast version")
if idx.dtype != numpy.int64:
if idx.dtype in [numpy.int8, numpy.int16, numpy.int32,
......@@ -2762,7 +2766,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
out[0] = o
else:
if self.perform_using_take is not None:
assert self.perform_using_take == False, (
assert self.perform_using_take is False, (
"GpuAdvancedSubtensor1 didn't use the fast version")
if out_[0][0] is None or out_[0][0].shape != out_shape:
o = cuda_ndarray.cuda_ndarray.CudaNdarray.zeros(out_shape)
......@@ -3006,8 +3010,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
convert_map = {8: tensor.basic._convert_to_int8,
16: tensor.basic._convert_to_int16,
32: tensor.basic._convert_to_int32,
64: tensor.basic._convert_to_int64
}
64: tensor.basic._convert_to_int64}
intwidth = theano.configdefaults.python_int_bitwidth()
ilist_ = convert_map[intwidth](ilist_)
......@@ -3354,7 +3357,6 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
return Apply(self, [x], [out_type()])
def gpu_flatten(x, outdim=1):
"""
Implement flatten on the gpu.
......@@ -3378,9 +3380,9 @@ def gpu_flatten(x, outdim=1):
"""
x = as_cuda_ndarray_variable(x)
if outdim > 1:
dims = tuple(x.shape[:outdim-1])+(-1,)
dims = tuple(x.shape[:outdim - 1]) + (-1, )
else:
dims = (-1,)
dims = (-1, )
return GpuReshape(outdim)(x, dims)
......@@ -3408,12 +3410,11 @@ class GpuJoin(tensor.Join, GpuOp):
as_tensor_variable_args = [as_cuda_ndarray_variable(x)
for x in tensors]
output_maker = \
lambda bcast: CudaNdarrayType(broadcastable=bcast)()
def output_maker(bcast):
return(CudaNdarrayType(broadcastable=bcast)())
return tensor.Join._make_node_internal(self,
axis, tensors,
as_tensor_variable_args, output_maker)
return tensor.Join._make_node_internal(
self, axis, tensors, as_tensor_variable_args, output_maker)
def perform(self, node, axis_and_tensors, out_):
out, = out_
......@@ -3464,7 +3465,7 @@ class GpuJoin(tensor.Join, GpuOp):
# except for 'axis'
def construct_slices(curlen):
slices = [slice(None, None, None) for i in \
slices = [slice(None, None, None) for i in
xrange(len(template_shape))]
slices[axis] = slice(curpos, curpos + curlen, None)
return tuple(slices)
......@@ -3829,8 +3830,8 @@ class GpuAlloc(GpuAllocEmpty):
# If the output is a constant, it will have to be deepcopied
# each time the function is called. So we do not fold.
return False
elif ( # The following ops work inplace of their input id 0.
client[1] == 0 and
# Else if the following ops work inplace of their input id 0.
elif(client[1] == 0 and
isinstance(client[0].op, (
# Ops that will work inplace on the Alloc. So if they
# get constant_folded, they would copy the
......@@ -3844,8 +3845,7 @@ class GpuAlloc(GpuAllocEmpty):
GpuAdvancedIncSubtensor1,
theano.sandbox.cuda.blas.GpuGemm,
theano.sandbox.cuda.blas.GpuGemv,
theano.sandbox.cuda.blas.GpuGer,
))):
theano.sandbox.cuda.blas.GpuGer,))):
return False
# If the clients is a transfer, we don't want to fold. We
# let the moving opt finish before deciding what to do.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论