提交 e4ae792a authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Flake8 for elemwise.py

上级 2e3d841e
...@@ -4,7 +4,6 @@ import os ...@@ -4,7 +4,6 @@ import os
from theano.compat import izip from theano.compat import izip
import numpy import numpy
import theano
from theano import Apply, scalar, config from theano import Apply, scalar, config
from theano import scalar as scal from theano import scalar as scal
from six.moves import StringIO, xrange from six.moves import StringIO, xrange
...@@ -108,7 +107,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -108,7 +107,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
scal_v_ins = [scalar.get_scalar_type(i.dtype) for i in node.inputs] scal_v_ins = [scalar.get_scalar_type(i.dtype) for i in node.inputs]
outs = [make_argument(o, 'o%d' % (n,)) for n, o in outs = [make_argument(o, 'o%d' % (n,)) for n, o in
enumerate(node.outputs) if not n in self.inplace_pattern] enumerate(node.outputs) if n not in self.inplace_pattern]
scal_v_outs = [scalar.get_scalar_type(o.dtype) for o in node.outputs] scal_v_outs = [scalar.get_scalar_type(o.dtype) for o in node.outputs]
fake_node = Apply(self.scalar_op, [i() for i in scal_v_ins], fake_node = Apply(self.scalar_op, [i() for i in scal_v_ins],
...@@ -132,7 +131,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -132,7 +131,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
else: else:
scal_out.append(arg.name + '[i]') scal_out.append(arg.name + '[i]')
kop = self.scalar_op.c_code(fake_node, nodename+'_scalar', kop = self.scalar_op.c_code(fake_node, nodename + '_scalar',
scal_in, scal_out, scal_in, scal_out,
dict(fail='return;')) dict(fail='return;'))
...@@ -171,7 +170,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -171,7 +170,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
("npy_float64", "ga_double"), ("npy_float64", "ga_double"),
]: ]:
kop = kop.replace(npy, ga) kop = kop.replace(npy, ga)
return ElemwiseKernel(None, inps+outs, kop, preamble=support_code) return ElemwiseKernel(None, inps + outs, kop, preamble=support_code)
def c_header_dirs(self): def c_header_dirs(self):
if pygpu.get_default_context().kind == 'opencl': if pygpu.get_default_context().kind == 'opencl':
...@@ -417,7 +416,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise): ...@@ -417,7 +416,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
GpuKernel_error(&%(kname)s, err)); GpuKernel_error(&%(kname)s, err));
%(fail)s; %(fail)s;
} }
""" % dict(kname=kname,fail=fail) """ % dict(kname=kname, fail=fail)
if config.gpuarray.sync: if config.gpuarray.sync:
code += """ code += """
err = GpuArray_sync(&%(z)s->ga); err = GpuArray_sync(&%(z)s->ga);
...@@ -495,7 +494,7 @@ class GpuDimShuffle(HideC, DimShuffle): ...@@ -495,7 +494,7 @@ class GpuDimShuffle(HideC, DimShuffle):
res = input res = input
res = res.transpose(self.shuffle+self.drop) res = res.transpose(self.shuffle + self.drop)
shape = list(res.shape[:len(self.shuffle)]) shape = list(res.shape[:len(self.shuffle)])
for augm in self.augment: for augm in self.augment:
...@@ -533,7 +532,7 @@ class GpuDimShuffle(HideC, DimShuffle): ...@@ -533,7 +532,7 @@ class GpuDimShuffle(HideC, DimShuffle):
Py_DECREF(tmp); Py_DECREF(tmp);
return res; return res;
} }
""" % dict(shuffle=', '.join(str(a) for a in (self.shuffle+self.drop)), """ % dict(shuffle=', '.join(str(a) for a in (self.shuffle + self.drop)),
name=name, nd_out=len(self.new_order), name=name, nd_out=len(self.new_order),
copy_shape=copy_shape(len(self.new_order))) copy_shape=copy_shape(len(self.new_order)))
...@@ -671,8 +670,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -671,8 +670,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
if self.pre_scalar_op: if self.pre_scalar_op:
# Currently we only tested pre_scalar_op that don't cause # Currently we only tested pre_scalar_op that don't cause
# upcast. # upcast.
d1 = self.__class__(scalar_op=self.scalar_op)(Elemwise(self.pre_scalar_op)(x))
assert d1.dtype == ret.outputs[0].dtype
assert Elemwise(self.pre_scalar_op)(x).dtype == x.dtype assert Elemwise(self.pre_scalar_op)(x).dtype == x.dtype
if self.reduce_mask is None: if self.reduce_mask is None:
if self.axis is None: if self.axis is None:
...@@ -863,14 +860,16 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -863,14 +860,16 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
# check if the tensor is ccontiguous, if true, use the c_code_reduce_ccontig code. # check if the tensor is ccontiguous, if true, use the c_code_reduce_ccontig code.
# TODO: check if we are ccontiguous when we un-dimshuffle # TODO: check if we are ccontiguous when we un-dimshuffle
# TODO: if only some dims are ccontiguous, call version with less dims. # TODO: if only some dims are ccontiguous, call version with less dims.
print('if(%(x)s->ga.flags & GA_C_CONTIGUOUS){'%locals(), file=sio) print('if(%(x)s->ga.flags & GA_C_CONTIGUOUS){' % locals(),
file=sio)
self.c_code_reduce_ccontig(sio, node, name, x, z, fail) self.c_code_reduce_ccontig(sio, node, name, x, z, fail)
print("}else{", file=sio) print("}else{", file=sio)
getattr(self, 'c_code_reduce_%s'%(''.join( getattr(self, 'c_code_reduce_%s' %
str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail) (''.join(str(i) for i in self.reduce_mask)))(
sio, node, name, x, z, fail)
print("}", file=sio) print("}", file=sio)
else: else:
getattr(self, 'c_code_reduce_%s'%(''.join( getattr(self, 'c_code_reduce_%s' % (''.join(
str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail) str(i) for i in self.reduce_mask)))(sio, node, name, x, z, fail)
# \end bracket the reduction ... # \end bracket the reduction ...
...@@ -1094,8 +1093,8 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1094,8 +1093,8 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
else: else:
assert isinstance(self.scalar_op, (scal.Maximum, assert isinstance(self.scalar_op, (scal.Maximum,
scal.Minimum)) scal.Minimum))
if self.pre_scalar_op: # TODO, multi_dtype! if self.pre_scalar_op: # TODO: multiple dtypes
#dtype = node.inputs[0].dtype # dtype = node.inputs[0].dtype
dtype = 'float32' dtype = 'float32'
dummy_var = scal.Scalar(dtype=dtype)() dummy_var = scal.Scalar(dtype=dtype)()
...@@ -1946,9 +1945,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1946,9 +1945,6 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
version = [16] # the version corresponding to the c code in this Op version = [16] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend... # now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype=output.type.dtype)() for output in node.outputs])
version.extend(self.scalar_op.c_code_cache_version()) version.extend(self.scalar_op.c_code_cache_version())
for i in node.inputs + node.outputs: for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version()) version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
...@@ -1962,7 +1958,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -1962,7 +1958,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
in_dtype = node.inputs[0].dtype in_dtype = node.inputs[0].dtype
out_dtype = node.outputs[0].dtype out_dtype = node.outputs[0].dtype
acc_dtype = self._acc_dtype(node.inputs[0].dtype) acc_dtype = self._acc_dtype(node.inputs[0].dtype)
flags=Kernel.get_flags(in_dtype, acc_dtype, out_dtype) flags = Kernel.get_flags(in_dtype, acc_dtype, out_dtype)
in_type = gpuarray.dtype_to_ctype(in_dtype) in_type = gpuarray.dtype_to_ctype(in_dtype)
out_type = gpuarray.dtype_to_ctype(out_dtype) out_type = gpuarray.dtype_to_ctype(out_dtype)
acc_type = gpuarray.dtype_to_ctype(acc_dtype) acc_type = gpuarray.dtype_to_ctype(acc_dtype)
...@@ -2106,7 +2102,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2106,7 +2102,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
] ]
kernels.append(Kernel(code=sio.getvalue(), name=kname, kernels.append(Kernel(code=sio.getvalue(), name=kname,
params=params, flags=flags, objvar=k_var)) params=params, flags=flags, objvar=k_var))
#01, 011, 0111 # 01, 011, 0111
if (0 == self.reduce_mask[0] and if (0 == self.reduce_mask[0] and
all(self.reduce_mask[1:]) and all(self.reduce_mask[1:]) and
nd_in in[2, 3, 4]): nd_in in[2, 3, 4]):
...@@ -2303,10 +2299,10 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2303,10 +2299,10 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
# this kernel uses one block for multiple column(up to 32TODO), # this kernel uses one block for multiple column(up to 32TODO),
# threads per block for each element per column. # threads per block for each element per column.
# thread.x = dim 2 contiguous # thread.x = dim 2 contiguous
# thread.y = dim 1 # thread.y = dim 1
# block.x = dim 0 # block.x = dim 0
# block.y = dim 1 rest # block.y = dim 1 rest
init = self._k_init(node, nodename) init = self._k_init(node, nodename)
decl, kname, params, k_var = self._k_decl(node, nodename, pattern="010_inner") decl, kname, params, k_var = self._k_decl(node, nodename, pattern="010_inner")
reducebuf = self._k_reduce_buf_multiple('Z[i0 * sZ0 + i2*sZ1]', reducebuf = self._k_reduce_buf_multiple('Z[i0 * sZ0 + i2*sZ1]',
...@@ -2625,7 +2621,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2625,7 +2621,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
{}, True) {}, True)
reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA1])") reduce_init = self._assign_init(load_in + "(A[blockIdx.x * sA1])")
kname = "kernel_reduce_1011" kname = "kernel_reduce_1011"
k_var= "kernel_reduce_1011_" + nodename k_var = "kernel_reduce_1011_" + nodename
sio = StringIO() sio = StringIO()
print(""" print("""
KERNEL void %(kname)s( KERNEL void %(kname)s(
...@@ -2753,7 +2749,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2753,7 +2749,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
flags=Kernel.get_flags(node.inputs[0].type.dtype, flags=Kernel.get_flags(node.inputs[0].type.dtype,
acc_dtype, acc_dtype,
node.outputs[0].type.dtype), node.outputs[0].type.dtype),
objvar='k_reduk_'+name)] objvar='k_reduk_' + name)]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
if not any(getattr(self, 'redux', [node.inputs[0].ndim != 0])): if not any(getattr(self, 'redux', [node.inputs[0].ndim != 0])):
...@@ -2768,7 +2764,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2768,7 +2764,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
if (%(sync)d) if (%(sync)d)
GpuArray_sync(&%(out)s->ga); GpuArray_sync(&%(out)s->ga);
""" % dict(out=out[0], inp=inp[0], fail=sub['fail'], """ % dict(out=out[0], inp=inp[0], fail=sub['fail'],
sync=bool(config.gpuarray.sync)) sync=bool(config.gpuarray.sync))
k = self.get_kernel_cache(node) k = self.get_kernel_cache(node)
_, src, _, ls = k._get_basic_kernel(k.init_local_size, _, src, _, ls = k._get_basic_kernel(k.init_local_size,
...@@ -2816,7 +2812,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2816,7 +2812,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
%(fail)s %(fail)s
} }
} }
""" % dict(output=output, nd_out=nd_out, fail=sub['fail'], """ % dict(output=output, nd_out=nd_out, fail=sub['fail'],
out_type=dtype_to_typecode(node.outputs[0].type.dtype)) out_type=dtype_to_typecode(node.outputs[0].type.dtype))
else: else:
code += """ code += """
...@@ -2828,7 +2824,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2828,7 +2824,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
%(fail)s %(fail)s
} }
} }
""" % dict(output=output, fail=sub['fail'], """ % dict(output=output, fail=sub['fail'],
out_type=dtype_to_typecode(node.outputs[0].type.dtype)) out_type=dtype_to_typecode(node.outputs[0].type.dtype))
if acc_dtype != node.outputs[0].type.dtype: if acc_dtype != node.outputs[0].type.dtype:
...@@ -2837,12 +2833,13 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2837,12 +2833,13 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
%(acc_type)s, GA_C_ORDER, pygpu_default_context(), %(acc_type)s, GA_C_ORDER, pygpu_default_context(),
Py_None); Py_None);
if (!tmp) %(fail)s if (!tmp) %(fail)s
""" % dict(output=output, fail=sub['fail'], acc_type=dtype_to_typecode(acc_dtype)) """ % dict(output=output, fail=sub['fail'],
acc_type=dtype_to_typecode(acc_dtype))
else: else:
code += """ code += """
tmp = %(output)s; tmp = %(output)s;
Py_INCREF(tmp); Py_INCREF(tmp);
""" % dict(output=output) """ % dict(output=output)
# We need the proxies since we are passing a pointer to the # We need the proxies since we are passing a pointer to the
# data into the call and therefore we need a real copy of the # data into the call and therefore we need a real copy of the
...@@ -2850,7 +2847,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2850,7 +2847,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
code += """ code += """
args[0] = &n; args[0] = &n;
args[1] = tmp->ga.data; args[1] = tmp->ga.data;
""" % dict(output=output) """ % dict(output=output)
p = 2 p = 2
for i in range(node.inputs[0].ndim): for i in range(node.inputs[0].ndim):
...@@ -2858,7 +2855,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2858,7 +2855,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
proxy_dim[%(i)s] = %(input)s->ga.dimensions[%(i)s]; proxy_dim[%(i)s] = %(input)s->ga.dimensions[%(i)s];
args[%(p)s] = &proxy_dim[%(i)s]; args[%(p)s] = &proxy_dim[%(i)s];
n *= %(input)s->ga.dimensions[%(i)s]; n *= %(input)s->ga.dimensions[%(i)s];
""" % dict(i=i, p=p, input=input) """ % dict(i=i, p=p, input=input)
p += 1 p += 1
if not redux[i]: if not redux[i]:
code += "gs *= %(input)s->ga.dimensions[%(i)s];" % dict(input=input, i=i) code += "gs *= %(input)s->ga.dimensions[%(i)s];" % dict(input=input, i=i)
...@@ -2867,14 +2864,14 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2867,14 +2864,14 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
args[%(p)s] = %(input)s->ga.data; args[%(p)s] = %(input)s->ga.data;
proxy_off = %(input)s->ga.offset; proxy_off = %(input)s->ga.offset;
args[%(p)s+1] = &proxy_off; args[%(p)s+1] = &proxy_off;
""" % dict(p=p, input=input) """ % dict(p=p, input=input)
p += 2 p += 2
for i in range(node.inputs[0].ndim): for i in range(node.inputs[0].ndim):
code += """ code += """
proxy_str[%(i)s] = %(input)s->ga.strides[%(i)s]; proxy_str[%(i)s] = %(input)s->ga.strides[%(i)s];
args[%(p)s] = &proxy_str[%(i)s]; args[%(p)s] = &proxy_str[%(i)s];
""" % dict(p=p, i=i, input=input) """ % dict(p=p, i=i, input=input)
p += 1 p += 1
code += """ code += """
...@@ -2911,7 +2908,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2911,7 +2908,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
%(fail)s %(fail)s
} }
} }
""" % dict(k_var='k_reduk_'+name, sync=bool(config.gpuarray.sync), """ % dict(k_var='k_reduk_' + name, sync=bool(config.gpuarray.sync),
ls=ls, fail=sub['fail'], output=output, input=input, ls=ls, fail=sub['fail'], output=output, input=input,
cast_out=bool(acc_dtype != node.outputs[0].type.dtype)) cast_out=bool(acc_dtype != node.outputs[0].type.dtype))
...@@ -2942,8 +2939,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype): ...@@ -2942,8 +2939,8 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
redux = self.redux redux = self.redux
if any(redux): if any(redux):
output[0] = self.get_kernel_cache(node)(input).astype(copy=False, output[0] = self.get_kernel_cache(node)(input).astype(
dtype=node.outputs[0].type.dtype) copy=False, dtype=node.outputs[0].type.dtype)
else: else:
output[0] = pygpu.gpuarray.array(input, copy=True, output[0] = pygpu.gpuarray.array(input, copy=True,
dtype=node.outputs[0].type.dtype) dtype=node.outputs[0].type.dtype)
......
...@@ -157,7 +157,6 @@ whitelist_flake8 = [ ...@@ -157,7 +157,6 @@ whitelist_flake8 = [
"sandbox/linalg/ops.py", "sandbox/linalg/ops.py",
"sandbox/linalg/__init__.py", "sandbox/linalg/__init__.py",
"sandbox/linalg/tests/test_linalg.py", "sandbox/linalg/tests/test_linalg.py",
"sandbox/gpuarray/elemwise.py",
"sandbox/gpuarray/type.py", "sandbox/gpuarray/type.py",
"sandbox/gpuarray/__init__.py", "sandbox/gpuarray/__init__.py",
"sandbox/gpuarray/kernel_codegen.py", "sandbox/gpuarray/kernel_codegen.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论