提交 2e3d841e authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Flake8 for nnet.py

上级 b6f9a5bd
...@@ -5,17 +5,19 @@ import os ...@@ -5,17 +5,19 @@ import os
from theano import Op, Apply, config from theano import Op, Apply, config
from six import StringIO from six import StringIO
from theano.gof.util import MethodNotDefined
try: try:
import pygpu import pygpu
from pygpu import gpuarray, elemwise from pygpu import gpuarray
except ImportError: except ImportError:
pass pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel) from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel)
from .type import GpuArrayType from .type import GpuArrayType
from .kernel_codegen import (nvcc_kernel, from .kernel_codegen import (nvcc_kernel,
inline_softmax, inline_softmax,
inline_softmax_fixed_shared) inline_softmax_fixed_shared)
from .fp16_help import work_dtype, load_w, write_w from .fp16_help import work_dtype, load_w, write_w
...@@ -665,60 +667,60 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -665,60 +667,60 @@ class GpuSoftmax(GpuKernelBase, Op):
] ]
kernels = [] kernels = []
kname = "kSoftmax" kname = "kSoftmax"
k_var= "kSoftmax_" + nodename k_var = "kSoftmax_" + nodename
code = nvcc_kernel(kname, code = nvcc_kernel(
params=['const ga_size M', 'const ga_size N', kname,
params=['const ga_size M', 'const ga_size N',
'const %s * x' % type_x, 'const ga_size offset_x', 'const %s * x' % type_x, 'const ga_size offset_x',
'const ga_ssize sx0', 'const ga_ssize sx1', 'const ga_ssize sx0', 'const ga_ssize sx1',
'%s * sm' % type_sm, 'const ga_size offset_sm', '%s * sm' % type_sm, 'const ga_size offset_sm',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'], 'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
body=[ body=["extern __shared__ %s buf[]" % type_acc,
"extern __shared__ %s buf[]" % type_acc, "%s * buf2 = buf + N" % type_acc,
"%s * buf2 = buf + N" % type_acc, "x = (const %s *)(((char *)x)+offset_x)" % type_x,
"x = (const %s *)(((char *)x)+offset_x)" % type_x, "sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm, "for (int blockIDX = blockIdx.x; blockIDX < M;"
"for (int blockIDX = blockIdx.x; blockIDX < M;" " blockIDX += gridDim.x){",
" blockIDX += gridDim.x){", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "buf[tx] = %s(x[blockIDX * sx0 + tx * sx1])" % load_x,
"buf[tx] = %s(x[blockIDX * sx0 + tx * sx1])" % load_x, "buf2[tx] = buf[tx]",
"buf2[tx] = buf[tx]", "}",
"}", "__syncthreads()",
"__syncthreads()", inline_softmax('N', 'buf', 'buf2', 'threadIdx.x',
inline_softmax('N', 'buf', 'buf2', 'threadIdx.x', 'blockDim.x', dtype=work_sm),
'blockDim.x', dtype=work_sm), "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", # This set all value correctly
# This set all value correctly "sm[blockIDX * sm_s0 + tx * sm_s1] = %s(buf[tx])" % write_sm,
"sm[blockIDX * sm_s0 + tx * sm_s1] = %s(buf[tx])" % write_sm, "}",
"}", "__syncthreads()",
"__syncthreads()", "}",
"}", ])
])
kernels.append(Kernel(code=code, name=kname, params=params, kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var)) flags=flags, objvar=k_var))
kname = "kSoftmax_fixed_shared" kname = "kSoftmax_fixed_shared"
k_var= "kSoftmax_fixed_shared" + nodename k_var = "kSoftmax_fixed_shared" + nodename
code = nvcc_kernel(kname, code = nvcc_kernel(
params=['const ga_size M', 'const ga_size N', kname,
params=['const ga_size M', 'const ga_size N',
'const %s * x' % type_x, 'const ga_size offset_x', 'const %s * x' % type_x, 'const ga_size offset_x',
'const ga_ssize sx0', 'const ga_ssize sx1', 'const ga_ssize sx0', 'const ga_ssize sx1',
'%s * sm' % type_sm, 'const ga_size offset_sm', '%s * sm' % type_sm, 'const ga_size offset_sm',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'], 'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
body=[ body=["extern __shared__ %s buf[]" % type_acc,
"extern __shared__ %s buf[]" % type_acc, "x = (const %s *)(((char *)x)+offset_x)" % type_x,
"x = (const %s *)(((char *)x)+offset_x)" % type_x, "sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm, "for (int blockIDX = blockIdx.x; blockIDX < M;"
"for (int blockIDX = blockIdx.x; blockIDX < M;" " blockIDX += gridDim.x){",
" blockIDX += gridDim.x){", "const %s *x_ptr = &x[blockIDX * sx0]" % type_x,
"const %s *x_ptr = &x[blockIDX * sx0]" % type_x, "%s *sm_ptr = &sm[blockIDX * sm_s0]" % type_sm,
"%s *sm_ptr = &sm[blockIDX * sm_s0]" % type_sm, inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1',
inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1', load_x,
load_x, 'sm_ptr', 'sm_s1', write_sm,
'sm_ptr', 'sm_s1', write_sm, 'threadIdx.x', 'blockDim.x',
'threadIdx.x', 'blockDim.x', dtype=work_sm),
dtype=work_sm), "__syncthreads()",
"__syncthreads()", "}",
"}", ])
])
kernels.append(Kernel(code=code, name=kname, params=params, kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var)) flags=flags, objvar=k_var))
return kernels return kernels
...@@ -743,7 +745,7 @@ class GpuSoftmaxWithBias (GpuKernelBase, Op): ...@@ -743,7 +745,7 @@ class GpuSoftmaxWithBias (GpuKernelBase, Op):
return Apply(self, [x, b], [x.type()]) return Apply(self, [x, b], [x.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
def c_code_cache_version(self): def c_code_cache_version(self):
return (13,) + inline_softmax.code_version return (13,) + inline_softmax.code_version
...@@ -892,65 +894,65 @@ class GpuSoftmaxWithBias (GpuKernelBase, Op): ...@@ -892,65 +894,65 @@ class GpuSoftmaxWithBias (GpuKernelBase, Op):
kernels = [] kernels = []
kname = "kSoftmaxWithBias" kname = "kSoftmaxWithBias"
k_var = "kSoftmaxWithBias_" + nodename k_var = "kSoftmaxWithBias_" + nodename
code = nvcc_kernel(kname, code = nvcc_kernel(
params=['const ga_size M', 'const ga_size N', kname,
'const %s * x' % type_x, 'const ga_size offset_x', params=['const ga_size M', 'const ga_size N',
'const ga_ssize sx0', 'const ga_ssize sx1', 'const %s * x' % type_x, 'const ga_size offset_x',
'const %s * b' % type_b, 'const ga_size offset_b', 'const ga_ssize sx0', 'const ga_ssize sx1',
'const ga_ssize sb0', 'const %s * b' % type_b, 'const ga_size offset_b',
'%s * sm' % type_sm, 'const ga_size offset_sm', 'const ga_ssize sb0',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'], '%s * sm' % type_sm, 'const ga_size offset_sm',
body=[ 'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
"extern __shared__ %s buf[]" % type_acc, body=["extern __shared__ %s buf[]" % type_acc,
"%s * buf2 = buf + N" % type_acc, "%s * buf2 = buf + N" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x, "x = (const %s *)(((char *)x)+offset_x)" % type_x,
"b = (const %s *)(((char *)b)+offset_b)" % type_b, "b = (const %s *)(((char *)b)+offset_b)" % type_b,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm, "sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;" "for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){", " blockIDX += gridDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"buf[tx] = %s(x[blockIDX * sx0 + tx * sx1])" % load_x, "buf[tx] = %s(x[blockIDX * sx0 + tx * sx1])" % load_x,
"buf[tx] += %s(b[tx * sb0])" % load_b, "buf[tx] += %s(b[tx * sb0])" % load_b,
"buf2[tx] = buf[tx]", "buf2[tx] = buf[tx]",
"}", "}",
"__syncthreads()", "__syncthreads()",
inline_softmax('N', 'buf', 'buf2', inline_softmax('N', 'buf', 'buf2',
'threadIdx.x', 'blockDim.x', work_sm), 'threadIdx.x', 'blockDim.x', work_sm),
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"sm[blockIDX * sm_s0 + tx * sm_s1] = %s(buf[tx])" % write_sm, "sm[blockIDX * sm_s0 + tx * sm_s1] = %s(buf[tx])" % write_sm,
"}", "}",
"__syncthreads()", "__syncthreads()",
"}", "}",
]) ])
kernels.append(Kernel(code=code, name=kname, params=params, kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var)) flags=flags, objvar=k_var))
kname = "kSoftmaxWithBias_fixed_shared" kname = "kSoftmaxWithBias_fixed_shared"
k_var = "kSoftmaxWithBias_fixed_shared" + nodename k_var = "kSoftmaxWithBias_fixed_shared" + nodename
code = nvcc_kernel(kname, code = nvcc_kernel(
params=['const ga_size M', 'const ga_size N', kname,
'const %s * x' % type_x, 'const ga_size offset_x', params=['const ga_size M', 'const ga_size N',
'const ga_ssize sx0', 'const ga_ssize sx1', 'const %s * x' % type_x, 'const ga_size offset_x',
'const %s * b' % type_b, 'const ga_size offset_b', 'const ga_ssize sx0', 'const ga_ssize sx1',
'const ga_ssize sb0', 'const %s * b' % type_b, 'const ga_size offset_b',
'%s * sm' % type_sm, 'const ga_size offset_sm', 'const ga_ssize sb0',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'], '%s * sm' % type_sm, 'const ga_size offset_sm',
body=[ 'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
"extern __shared__ %s buf[]" % type_acc, body=["extern __shared__ %s buf[]" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x, "x = (const %s *)(((char *)x)+offset_x)" % type_x,
"b = (const %s *)(((char *)b)+offset_b)" % type_b, "b = (const %s *)(((char *)b)+offset_b)" % type_b,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm, "sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;" "for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){", " blockIDX += gridDim.x){",
"const %s *x_ptr = &x[blockIDX * sx0]" % type_x, "const %s *x_ptr = &x[blockIDX * sx0]" % type_x,
"%s *sm_ptr = &sm[blockIDX * sm_s0]" % type_sm, "%s *sm_ptr = &sm[blockIDX * sm_s0]" % type_sm,
inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1', inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1',
load_x, load_x,
'sm_ptr', 'sm_s1', write_sm, 'sm_ptr', 'sm_s1', write_sm,
'threadIdx.x', 'blockDim.x', 'threadIdx.x', 'blockDim.x',
'b', 'sb0', load_b, work_sm), 'b', 'sb0', load_b, work_sm),
"__syncthreads()", "__syncthreads()",
"}", "}",
]) ])
kernels.append(Kernel(code=code, name=kname, params=params, kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var)) flags=flags, objvar=k_var))
return kernels return kernels
......
...@@ -157,7 +157,6 @@ whitelist_flake8 = [ ...@@ -157,7 +157,6 @@ whitelist_flake8 = [
"sandbox/linalg/ops.py", "sandbox/linalg/ops.py",
"sandbox/linalg/__init__.py", "sandbox/linalg/__init__.py",
"sandbox/linalg/tests/test_linalg.py", "sandbox/linalg/tests/test_linalg.py",
"sandbox/gpuarray/nnet.py",
"sandbox/gpuarray/elemwise.py", "sandbox/gpuarray/elemwise.py",
"sandbox/gpuarray/type.py", "sandbox/gpuarray/type.py",
"sandbox/gpuarray/__init__.py", "sandbox/gpuarray/__init__.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论