提交 ed7759fb authored 作者: Chiheb Trabelsi's avatar Chiheb Trabelsi

basic_ops.py has been modified in order to respect the flake8 style

上级 200babca
...@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division ...@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
import copy import copy
import logging import logging
import sys import sys
import warnings
import numpy import numpy
from six import iteritems from six import iteritems
from six.moves import StringIO, xrange from six.moves import StringIO, xrange
...@@ -12,6 +12,9 @@ from theano import gof, Type, Apply ...@@ -12,6 +12,9 @@ from theano import gof, Type, Apply
from theano import tensor, scalar, config from theano import tensor, scalar, config
from theano.gradient import grad_undefined from theano.gradient import grad_undefined
from theano.scalar import Scalar from theano.scalar import Scalar
from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.elemwise import NaiveAlgo
scal = scalar # somewhere scalar gets reassigned to be a function scal = scalar # somewhere scalar gets reassigned to be a function
...@@ -24,10 +27,6 @@ try: ...@@ -24,10 +27,6 @@ try:
except ImportError: except ImportError:
pass pass
from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.elemwise import NaiveAlgo
_logger_name = 'theano.sandbox.cuda.basic_ops' _logger_name = 'theano.sandbox.cuda.basic_ops'
_logger = logging.getLogger(_logger_name) _logger = logging.getLogger(_logger_name)
...@@ -596,10 +595,8 @@ class GpuCAReduce(GpuOp): ...@@ -596,10 +595,8 @@ class GpuCAReduce(GpuOp):
if self.pre_scalar_op: if self.pre_scalar_op:
pre = "pre=%s,red=" % str(self.pre_scalar_op) pre = "pre=%s,red=" % str(self.pre_scalar_op)
return "GpuCAReduce{%s%s}{%s}" % ( return "GpuCAReduce{%s%s}{%s}" % (
pre, pre, str(self.scalar_op),
str(self.scalar_op), ','.join(str(i) for i in self.reduce_mask))
','.join(str(i) for i in self.reduce_mask)
)
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
...@@ -775,15 +772,18 @@ class GpuCAReduce(GpuOp): ...@@ -775,15 +772,18 @@ class GpuCAReduce(GpuOp):
# check if the tensor is ccontiguous, if true, use the c_code_reduce_ccontig code. # check if the tensor is ccontiguous, if true, use the c_code_reduce_ccontig code.
# TODO: check if we are ccontiguous when we un-dimshuffle # TODO: check if we are ccontiguous when we un-dimshuffle
# TODO: if only some dims are ccontiguous, call version with less dims. # TODO: if only some dims are ccontiguous, call version with less dims.
print('if(CudaNdarray_is_c_contiguous(%(x)s)){'%locals(), file=sio) print('if(CudaNdarray_is_c_contiguous( %(x)s)){' % locals(),
file=sio)
self.c_code_reduce_ccontig(sio, node, name, x, z, fail) self.c_code_reduce_ccontig(sio, node, name, x, z, fail)
print("}else{", file=sio) print("}else{", file=sio)
getattr(self, 'c_code_reduce_%s'%(''.join( getattr(self, 'c_code_reduce_%s' % (''.join(
str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail) str(i) for i in self.reduce_mask)))(
sio, node, name, x, z, fail)
print("}", file=sio) print("}", file=sio)
else: else:
getattr(self, 'c_code_reduce_%s'%(''.join( getattr(self, 'c_code_reduce_%s' % (''.join(
str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail) str(i) for i in self.reduce_mask)))(
sio, node, name, x, z, fail)
# \end bracket the reduction ... # \end bracket the reduction ...
print(""" print("""
...@@ -976,7 +976,7 @@ class GpuCAReduce(GpuOp): ...@@ -976,7 +976,7 @@ class GpuCAReduce(GpuOp):
assert isinstance(self.scalar_op, (scal.Maximum, assert isinstance(self.scalar_op, (scal.Maximum,
scal.Minimum)) scal.Minimum))
if self.pre_scalar_op: if self.pre_scalar_op:
#dtype = node.inputs[0].dtype # dtype = node.inputs[0].dtype
dtype = 'float32' dtype = 'float32'
dummy_var = scal.Scalar(dtype=dtype)() dummy_var = scal.Scalar(dtype=dtype)()
...@@ -1834,12 +1834,15 @@ class GpuCAReduce(GpuOp): ...@@ -1834,12 +1834,15 @@ class GpuCAReduce(GpuOp):
version = [15] # the version corresponding to the c code in this Op version = [15] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend... # now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op, Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)() for input in node.inputs], [Scalar(
[Scalar(dtype=output.type.dtype)() for output in node.outputs]) dtype=input.type.dtype)() for input in node.inputs],
[Scalar(
dtype=output.type.dtype)() for output in node.outputs])
version.extend(self.scalar_op.c_code_cache_version()) version.extend(self.scalar_op.c_code_cache_version())
for i in node.inputs + node.outputs: for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version()) version.extend(
Scalar(dtype=i.type.dtype).c_code_cache_version())
if all(version): if all(version):
return tuple(version) return tuple(version)
else: else:
...@@ -1946,10 +1949,11 @@ class GpuCAReduce(GpuOp): ...@@ -1946,10 +1949,11 @@ class GpuCAReduce(GpuOp):
%(reducebuf)s %(reducebuf)s
} }
""" % locals(), file=sio) """ % locals(), file=sio)
#01, 011, 0111 # 01, 011, 0111
if (0 == self.reduce_mask[0] and if (0 == self.reduce_mask[0] and
all(self.reduce_mask[1:]) and all(self.reduce_mask[1:]) and
nd_in in[2, 3, 4]): nd_in in[2, 3, 4]):
# this kernel uses one block for each row. # this kernel uses one block for each row.
# threads per block for each element per row. # threads per block for each element per row.
...@@ -2117,10 +2121,10 @@ class GpuCAReduce(GpuOp): ...@@ -2117,10 +2121,10 @@ class GpuCAReduce(GpuOp):
# this kernel uses one block for multiple column(up to 32TODO), # this kernel uses one block for multiple column(up to 32TODO),
# threads per block for each element per column. # threads per block for each element per column.
# thread.x = dim 2 contiguous # thread.x = dim 2 contiguous
# thread.y = dim 1 # thread.y = dim 1
# block.x = dim 0 # block.x = dim 0
# block.y = dim 1 rest # block.y = dim 1 rest
init = self._k_init(node, nodename) init = self._k_init(node, nodename)
decl = self._k_decl(node, nodename, pattern="010_inner") decl = self._k_decl(node, nodename, pattern="010_inner")
reducebuf = self._k_reduce_buf_multiple('Z[i0 * sZ0 + i2*sZ1]', reducebuf = self._k_reduce_buf_multiple('Z[i0 * sZ0 + i2*sZ1]',
...@@ -2470,7 +2474,7 @@ class GpuReshape(tensor.Reshape, GpuOp): ...@@ -2470,7 +2474,7 @@ class GpuReshape(tensor.Reshape, GpuOp):
if (x.size % ss) != 0: if (x.size % ss) != 0:
raise ValueError("When using -1 in new shape, the computed new shape must be an multiple of the original shape.") raise ValueError("When using -1 in new shape, the computed new shape must be an multiple of the original shape.")
shp_new = numpy.copy(shp) shp_new = numpy.copy(shp)
shp_new[m1_idx] = x.size/ss shp_new[m1_idx] = x.size / ss
shp = shp_new shp = shp_new
else: else:
...@@ -2721,7 +2725,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp): ...@@ -2721,7 +2725,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
def perform(self, node, inp, out_): def perform(self, node, inp, out_):
# This don't work as CudaNdarray_Subscript() don't support it. # This don't work as CudaNdarray_Subscript() don't support it.
#super(GpuAdvancedSubtensor1, self).perform(node, inp, out_) # super(GpuAdvancedSubtensor1, self).perform(node, inp, out_)
x, idx = inp x, idx = inp
out, = out_ out, = out_
x_orig = x x_orig = x
...@@ -2733,7 +2737,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp): ...@@ -2733,7 +2737,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
if x.ndim <= 3: if x.ndim <= 3:
# CudaNdarray.take only supports ndim <= 3 # CudaNdarray.take only supports ndim <= 3
if self.perform_using_take is not None: if self.perform_using_take is not None:
assert self.perform_using_take == True, ( assert self.perform_using_take is True, (
"GpuAdvancedSubtensor1 used the fast version") "GpuAdvancedSubtensor1 used the fast version")
if idx.dtype != numpy.int64: if idx.dtype != numpy.int64:
if idx.dtype in [numpy.int8, numpy.int16, numpy.int32, if idx.dtype in [numpy.int8, numpy.int16, numpy.int32,
...@@ -2762,7 +2766,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp): ...@@ -2762,7 +2766,7 @@ class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
out[0] = o out[0] = o
else: else:
if self.perform_using_take is not None: if self.perform_using_take is not None:
assert self.perform_using_take == False, ( assert self.perform_using_take is False, (
"GpuAdvancedSubtensor1 didn't use the fast version") "GpuAdvancedSubtensor1 didn't use the fast version")
if out_[0][0] is None or out_[0][0].shape != out_shape: if out_[0][0] is None or out_[0][0].shape != out_shape:
o = cuda_ndarray.cuda_ndarray.CudaNdarray.zeros(out_shape) o = cuda_ndarray.cuda_ndarray.CudaNdarray.zeros(out_shape)
...@@ -3006,8 +3010,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1): ...@@ -3006,8 +3010,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
convert_map = {8: tensor.basic._convert_to_int8, convert_map = {8: tensor.basic._convert_to_int8,
16: tensor.basic._convert_to_int16, 16: tensor.basic._convert_to_int16,
32: tensor.basic._convert_to_int32, 32: tensor.basic._convert_to_int32,
64: tensor.basic._convert_to_int64 64: tensor.basic._convert_to_int64}
}
intwidth = theano.configdefaults.python_int_bitwidth() intwidth = theano.configdefaults.python_int_bitwidth()
ilist_ = convert_map[intwidth](ilist_) ilist_ = convert_map[intwidth](ilist_)
...@@ -3354,7 +3357,6 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp): ...@@ -3354,7 +3357,6 @@ class GpuFlatten(gof.HideC, tensor.Flatten, GpuOp):
return Apply(self, [x], [out_type()]) return Apply(self, [x], [out_type()])
def gpu_flatten(x, outdim=1): def gpu_flatten(x, outdim=1):
""" """
Implement flatten on the gpu. Implement flatten on the gpu.
...@@ -3378,9 +3380,9 @@ def gpu_flatten(x, outdim=1): ...@@ -3378,9 +3380,9 @@ def gpu_flatten(x, outdim=1):
""" """
x = as_cuda_ndarray_variable(x) x = as_cuda_ndarray_variable(x)
if outdim > 1: if outdim > 1:
dims = tuple(x.shape[:outdim-1])+(-1,) dims = tuple(x.shape[:outdim - 1]) + (-1, )
else: else:
dims = (-1,) dims = (-1, )
return GpuReshape(outdim)(x, dims) return GpuReshape(outdim)(x, dims)
...@@ -3408,12 +3410,11 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -3408,12 +3410,11 @@ class GpuJoin(tensor.Join, GpuOp):
as_tensor_variable_args = [as_cuda_ndarray_variable(x) as_tensor_variable_args = [as_cuda_ndarray_variable(x)
for x in tensors] for x in tensors]
output_maker = \ def output_maker(bcast):
lambda bcast: CudaNdarrayType(broadcastable=bcast)() return(CudaNdarrayType(broadcastable=bcast)())
return tensor.Join._make_node_internal(self, return tensor.Join._make_node_internal(
axis, tensors, self, axis, tensors, as_tensor_variable_args, output_maker)
as_tensor_variable_args, output_maker)
def perform(self, node, axis_and_tensors, out_): def perform(self, node, axis_and_tensors, out_):
out, = out_ out, = out_
...@@ -3464,7 +3465,7 @@ class GpuJoin(tensor.Join, GpuOp): ...@@ -3464,7 +3465,7 @@ class GpuJoin(tensor.Join, GpuOp):
# except for 'axis' # except for 'axis'
def construct_slices(curlen): def construct_slices(curlen):
slices = [slice(None, None, None) for i in \ slices = [slice(None, None, None) for i in
xrange(len(template_shape))] xrange(len(template_shape))]
slices[axis] = slice(curpos, curpos + curlen, None) slices[axis] = slice(curpos, curpos + curlen, None)
return tuple(slices) return tuple(slices)
...@@ -3829,8 +3830,8 @@ class GpuAlloc(GpuAllocEmpty): ...@@ -3829,8 +3830,8 @@ class GpuAlloc(GpuAllocEmpty):
# If the output is a constant, it will have to be deepcopied # If the output is a constant, it will have to be deepcopied
# each time the function is called. So we do not fold. # each time the function is called. So we do not fold.
return False return False
elif ( # The following ops work inplace of their input id 0. # Else if the following ops work inplace of their input id 0.
client[1] == 0 and elif(client[1] == 0 and
isinstance(client[0].op, ( isinstance(client[0].op, (
# Ops that will work inplace on the Alloc. So if they # Ops that will work inplace on the Alloc. So if they
# get constant_folded, they would copy the # get constant_folded, they would copy the
...@@ -3844,8 +3845,7 @@ class GpuAlloc(GpuAllocEmpty): ...@@ -3844,8 +3845,7 @@ class GpuAlloc(GpuAllocEmpty):
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
theano.sandbox.cuda.blas.GpuGemm, theano.sandbox.cuda.blas.GpuGemm,
theano.sandbox.cuda.blas.GpuGemv, theano.sandbox.cuda.blas.GpuGemv,
theano.sandbox.cuda.blas.GpuGer, theano.sandbox.cuda.blas.GpuGer,))):
))):
return False return False
# If the clients is a transfer, we don't want to fold. We # If the clients is a transfer, we don't want to fold. We
# let the moving opt finish before deciding what to do. # let the moving opt finish before deciding what to do.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论