提交 2e3d841e authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Flake8 for nnet.py

上级 b6f9a5bd
......@@ -5,17 +5,19 @@ import os
from theano import Op, Apply, config
from six import StringIO
from theano.gof.util import MethodNotDefined
try:
import pygpu
from pygpu import gpuarray, elemwise
from pygpu import gpuarray
except ImportError:
pass
from .basic_ops import (as_gpuarray_variable, GpuKernelBase, Kernel)
from .type import GpuArrayType
from .kernel_codegen import (nvcc_kernel,
inline_softmax,
inline_softmax_fixed_shared)
inline_softmax,
inline_softmax_fixed_shared)
from .fp16_help import work_dtype, load_w, write_w
......@@ -665,60 +667,60 @@ class GpuSoftmax(GpuKernelBase, Op):
]
kernels = []
kname = "kSoftmax"
k_var= "kSoftmax_" + nodename
code = nvcc_kernel(kname,
params=['const ga_size M', 'const ga_size N',
k_var = "kSoftmax_" + nodename
code = nvcc_kernel(
kname,
params=['const ga_size M', 'const ga_size N',
'const %s * x' % type_x, 'const ga_size offset_x',
'const ga_ssize sx0', 'const ga_ssize sx1',
'%s * sm' % type_sm, 'const ga_size offset_sm',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
body=[
"extern __shared__ %s buf[]" % type_acc,
"%s * buf2 = buf + N" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"buf[tx] = %s(x[blockIDX * sx0 + tx * sx1])" % load_x,
"buf2[tx] = buf[tx]",
"}",
"__syncthreads()",
inline_softmax('N', 'buf', 'buf2', 'threadIdx.x',
'blockDim.x', dtype=work_sm),
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
# This set all value correctly
"sm[blockIDX * sm_s0 + tx * sm_s1] = %s(buf[tx])" % write_sm,
"}",
"__syncthreads()",
"}",
])
body=["extern __shared__ %s buf[]" % type_acc,
"%s * buf2 = buf + N" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"buf[tx] = %s(x[blockIDX * sx0 + tx * sx1])" % load_x,
"buf2[tx] = buf[tx]",
"}",
"__syncthreads()",
inline_softmax('N', 'buf', 'buf2', 'threadIdx.x',
'blockDim.x', dtype=work_sm),
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
# This set all value correctly
"sm[blockIDX * sm_s0 + tx * sm_s1] = %s(buf[tx])" % write_sm,
"}",
"__syncthreads()",
"}",
])
kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var))
kname = "kSoftmax_fixed_shared"
k_var= "kSoftmax_fixed_shared" + nodename
code = nvcc_kernel(kname,
params=['const ga_size M', 'const ga_size N',
k_var = "kSoftmax_fixed_shared" + nodename
code = nvcc_kernel(
kname,
params=['const ga_size M', 'const ga_size N',
'const %s * x' % type_x, 'const ga_size offset_x',
'const ga_ssize sx0', 'const ga_ssize sx1',
'%s * sm' % type_sm, 'const ga_size offset_sm',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
body=[
"extern __shared__ %s buf[]" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){",
"const %s *x_ptr = &x[blockIDX * sx0]" % type_x,
"%s *sm_ptr = &sm[blockIDX * sm_s0]" % type_sm,
inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1',
load_x,
'sm_ptr', 'sm_s1', write_sm,
'threadIdx.x', 'blockDim.x',
dtype=work_sm),
"__syncthreads()",
"}",
])
body=["extern __shared__ %s buf[]" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){",
"const %s *x_ptr = &x[blockIDX * sx0]" % type_x,
"%s *sm_ptr = &sm[blockIDX * sm_s0]" % type_sm,
inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1',
load_x,
'sm_ptr', 'sm_s1', write_sm,
'threadIdx.x', 'blockDim.x',
dtype=work_sm),
"__syncthreads()",
"}",
])
kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var))
return kernels
......@@ -743,7 +745,7 @@ class GpuSoftmaxWithBias (GpuKernelBase, Op):
return Apply(self, [x, b], [x.type()])
def infer_shape(self, node, shape):
return [shape[0]]
return [shape[0]]
def c_code_cache_version(self):
return (13,) + inline_softmax.code_version
......@@ -892,65 +894,65 @@ class GpuSoftmaxWithBias (GpuKernelBase, Op):
kernels = []
kname = "kSoftmaxWithBias"
k_var = "kSoftmaxWithBias_" + nodename
code = nvcc_kernel(kname,
params=['const ga_size M', 'const ga_size N',
'const %s * x' % type_x, 'const ga_size offset_x',
'const ga_ssize sx0', 'const ga_ssize sx1',
'const %s * b' % type_b, 'const ga_size offset_b',
'const ga_ssize sb0',
'%s * sm' % type_sm, 'const ga_size offset_sm',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
body=[
"extern __shared__ %s buf[]" % type_acc,
"%s * buf2 = buf + N" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x,
"b = (const %s *)(((char *)b)+offset_b)" % type_b,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"buf[tx] = %s(x[blockIDX * sx0 + tx * sx1])" % load_x,
"buf[tx] += %s(b[tx * sb0])" % load_b,
"buf2[tx] = buf[tx]",
"}",
"__syncthreads()",
inline_softmax('N', 'buf', 'buf2',
'threadIdx.x', 'blockDim.x', work_sm),
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"sm[blockIDX * sm_s0 + tx * sm_s1] = %s(buf[tx])" % write_sm,
"}",
"__syncthreads()",
"}",
])
code = nvcc_kernel(
kname,
params=['const ga_size M', 'const ga_size N',
'const %s * x' % type_x, 'const ga_size offset_x',
'const ga_ssize sx0', 'const ga_ssize sx1',
'const %s * b' % type_b, 'const ga_size offset_b',
'const ga_ssize sb0',
'%s * sm' % type_sm, 'const ga_size offset_sm',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
body=["extern __shared__ %s buf[]" % type_acc,
"%s * buf2 = buf + N" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x,
"b = (const %s *)(((char *)b)+offset_b)" % type_b,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"buf[tx] = %s(x[blockIDX * sx0 + tx * sx1])" % load_x,
"buf[tx] += %s(b[tx * sb0])" % load_b,
"buf2[tx] = buf[tx]",
"}",
"__syncthreads()",
inline_softmax('N', 'buf', 'buf2',
'threadIdx.x', 'blockDim.x', work_sm),
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"sm[blockIDX * sm_s0 + tx * sm_s1] = %s(buf[tx])" % write_sm,
"}",
"__syncthreads()",
"}",
])
kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var))
kname = "kSoftmaxWithBias_fixed_shared"
k_var = "kSoftmaxWithBias_fixed_shared" + nodename
code = nvcc_kernel(kname,
params=['const ga_size M', 'const ga_size N',
'const %s * x' % type_x, 'const ga_size offset_x',
'const ga_ssize sx0', 'const ga_ssize sx1',
'const %s * b' % type_b, 'const ga_size offset_b',
'const ga_ssize sb0',
'%s * sm' % type_sm, 'const ga_size offset_sm',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
body=[
"extern __shared__ %s buf[]" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x,
"b = (const %s *)(((char *)b)+offset_b)" % type_b,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){",
"const %s *x_ptr = &x[blockIDX * sx0]" % type_x,
"%s *sm_ptr = &sm[blockIDX * sm_s0]" % type_sm,
inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1',
load_x,
'sm_ptr', 'sm_s1', write_sm,
'threadIdx.x', 'blockDim.x',
'b', 'sb0', load_b, work_sm),
"__syncthreads()",
"}",
])
code = nvcc_kernel(
kname,
params=['const ga_size M', 'const ga_size N',
'const %s * x' % type_x, 'const ga_size offset_x',
'const ga_ssize sx0', 'const ga_ssize sx1',
'const %s * b' % type_b, 'const ga_size offset_b',
'const ga_ssize sb0',
'%s * sm' % type_sm, 'const ga_size offset_sm',
'const ga_ssize sm_s0', 'const ga_ssize sm_s1'],
body=["extern __shared__ %s buf[]" % type_acc,
"x = (const %s *)(((char *)x)+offset_x)" % type_x,
"b = (const %s *)(((char *)b)+offset_b)" % type_b,
"sm = (%s *)(((char *)sm)+offset_sm)" % type_sm,
"for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){",
"const %s *x_ptr = &x[blockIDX * sx0]" % type_x,
"%s *sm_ptr = &sm[blockIDX * sm_s0]" % type_sm,
inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1',
load_x,
'sm_ptr', 'sm_s1', write_sm,
'threadIdx.x', 'blockDim.x',
'b', 'sb0', load_b, work_sm),
"__syncthreads()",
"}",
])
kernels.append(Kernel(code=code, name=kname, params=params,
flags=flags, objvar=k_var))
return kernels
......
......@@ -157,7 +157,6 @@ whitelist_flake8 = [
"sandbox/linalg/ops.py",
"sandbox/linalg/__init__.py",
"sandbox/linalg/tests/test_linalg.py",
"sandbox/gpuarray/nnet.py",
"sandbox/gpuarray/elemwise.py",
"sandbox/gpuarray/type.py",
"sandbox/gpuarray/__init__.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论