提交 74f8fc3e authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Flake8 for kernel_codegen.py

上级 b504c51f
...@@ -4,11 +4,11 @@ Helper routines for generating gpu kernels for nvcc. ...@@ -4,11 +4,11 @@ Helper routines for generating gpu kernels for nvcc.
""" """
try: try:
import pygpu
from pygpu import gpuarray from pygpu import gpuarray
except ImportError: except ImportError:
pass pass
def nvcc_kernel(name, params, body): def nvcc_kernel(name, params, body):
""" """
Return the c code of a kernel function. Return the c code of a kernel function.
...@@ -174,16 +174,15 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"): ...@@ -174,16 +174,15 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"):
""" """
ctype = gpuarray.dtype_to_ctype(dtype) ctype = gpuarray.dtype_to_ctype(dtype)
return [ # get max of buf (trashing all but buf[0])
# get max of buf (trashing all but buf[0]) return [inline_reduce_max(N, buf, threadPos, threadCount),
inline_reduce_max(N, buf, threadPos, threadCount),
'__syncthreads()', '__syncthreads()',
('%s row_max = ' + buf + '[0]') % ctype, ('%s row_max = ' + buf + '[0]') % ctype,
'__syncthreads()', '__syncthreads()',
'for(int __i=' + threadPos + '; __i<' + N + 'for(int __i=' + threadPos + '; __i<' + N +
'; __i+=' + threadCount + '){', '; __i+=' + threadCount + '){',
buf + '[__i] = exp(' + buf2 + '[__i] - row_max)', buf + '[__i] = exp(' + buf2 + '[__i] - row_max)',
buf2 + '[__i] = ' + buf + '[__i]', buf2 + '[__i] = ' + buf + '[__i]',
'}', '}',
'__syncthreads()', '__syncthreads()',
inline_reduce_sum(N, buf, threadPos, threadCount), inline_reduce_sum(N, buf, threadPos, threadCount),
...@@ -192,8 +191,8 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"): ...@@ -192,8 +191,8 @@ def inline_softmax(N, buf, buf2, threadPos, threadCount, dtype="float32"):
'__syncthreads()', '__syncthreads()',
# divide each exp() result by the sum to complete the job. # divide each exp() result by the sum to complete the job.
'for(int __i=' + threadPos + '; __i<' + N + 'for(int __i=' + threadPos + '; __i<' + N +
'; __i+=' + threadCount + '){', '; __i+=' + threadCount + '){',
buf + '[__i] = ' + buf2 + '[__i] / row_sum', buf + '[__i] = ' + buf2 + '[__i] / row_sum',
'}', '}',
'__syncthreads()', '__syncthreads()',
] ]
...@@ -232,7 +231,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count, ...@@ -232,7 +231,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count,
Optional, the dtype of the output. Optional, the dtype of the output.
manner_fn manner_fn
A function that accepts strings of arguments a and b, and returns c code A function that accepts strings of arguments a and b, and returns c code
for their reduction. for their reduction.
Example: return "%(a)s + %(b)s" for a sum reduction. Example: return "%(a)s + %(b)s" for a sum reduction.
manner_init manner_init
A function that accepts strings of arguments a and return c code for its A function that accepts strings of arguments a and return c code for its
...@@ -259,7 +258,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count, ...@@ -259,7 +258,7 @@ def inline_reduce_fixed_shared(N, buf, x, stride_x, load_x, pos, count,
loop_line = manner_fn("red", manner_init("%(load_x)s(%(x)s[i * %(stride_x)s])" % loop_line = manner_fn("red", manner_init("%(load_x)s(%(x)s[i * %(stride_x)s])" %
locals())) locals()))
loop_line2 = manner_fn("%s[%s]" % (buf, pos), loop_line2 = manner_fn("%s[%s]" % (buf, pos),
"%s[i]" % buf) "%s[i]" % buf)
r_16 = manner_fn("%s[%s]" % (buf, pos), "%s[%s+16]" % (buf, pos)) r_16 = manner_fn("%s[%s]" % (buf, pos), "%s[%s+16]" % (buf, pos))
r_8 = manner_fn("%s[%s]" % (buf, pos), "%s[%s+8]" % (buf, pos)) r_8 = manner_fn("%s[%s]" % (buf, pos), "%s[%s+8]" % (buf, pos))
r_4 = manner_fn("%s[%s]" % (buf, pos), "%s[%s+4]" % (buf, pos)) r_4 = manner_fn("%s[%s]" % (buf, pos), "%s[%s+4]" % (buf, pos))
...@@ -324,7 +323,7 @@ def inline_softmax_fixed_shared(N, buf, x, stride_x, load_x, ...@@ -324,7 +323,7 @@ def inline_softmax_fixed_shared(N, buf, x, stride_x, load_x,
Parameters Parameters
---------- ----------
N N
Length of the buffer, atleast waprSize(32). Length of the buffer, atleast waprSize(32).
buf buf
A shared memory buffer of size warpSize * sizeof(dtype). A shared memory buffer of size warpSize * sizeof(dtype).
......
...@@ -158,7 +158,6 @@ whitelist_flake8 = [ ...@@ -158,7 +158,6 @@ whitelist_flake8 = [
"sandbox/linalg/__init__.py", "sandbox/linalg/__init__.py",
"sandbox/linalg/tests/test_linalg.py", "sandbox/linalg/tests/test_linalg.py",
"sandbox/gpuarray/__init__.py", "sandbox/gpuarray/__init__.py",
"sandbox/gpuarray/kernel_codegen.py",
"sandbox/gpuarray/conv.py", "sandbox/gpuarray/conv.py",
"sandbox/gpuarray/neighbours.py", "sandbox/gpuarray/neighbours.py",
"sandbox/gpuarray/tests/test_subtensor.py", "sandbox/gpuarray/tests/test_subtensor.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论