提交 755d7cdf authored 作者: Chiheb Trabelsi's avatar Chiheb Trabelsi

nnet.py has been modified in order to respect the flake8 style

nnet.py has been fixed nnet.py do not contain long lines.
上级 06adacd3
...@@ -578,45 +578,46 @@ class GpuSoftmax(GpuOp): ...@@ -578,45 +578,46 @@ class GpuSoftmax(GpuOp):
""" % locals() """ % locals()
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
ret1 = nvcc_kernel("kSoftmax_%s" % nodename, ret1 = nvcc_kernel(
params=['int M', 'int N', "kSoftmax_%s" % nodename,
'const float * x', 'const int sx0', 'const int sx1', params=['int M', 'int N',
'float * sm', 'const int sm_s0', 'const int sm_s1'], 'const float * x',
body=[ 'const int sx0',
"extern __shared__ float buf[]", 'const int sx1',
"float * buf2 = buf + N", 'float * sm',
"for (int blockIDX = blockIdx.x; blockIDX < M;" 'const int sm_s0',
" blockIDX += gridDim.x){", 'const int sm_s1'],
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", body=["extern __shared__ float buf[]",
"buf[tx] = x[blockIDX * sx0 + tx * sx1]", "float * buf2 = buf + N",
"buf2[tx] = buf[tx]", "for (int blockIDX = blockIdx.x; blockIDX < M;"
"}", " blockIDX += gridDim.x){",
"__syncthreads()", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
inline_softmax('N', 'buf', 'buf2', "buf[tx] = x[blockIDX * sx0 + tx * sx1]",
'threadIdx.x', 'blockDim.x'), "buf2[tx] = buf[tx]", "}", "__syncthreads()",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", inline_softmax('N',
# This set all value correctly 'buf',
"sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]", 'buf2',
"}", 'threadIdx.x',
"__syncthreads()", 'blockDim.x'),
"}", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
]) # This set all value correctly
ret2 = nvcc_kernel("kSoftmax_fixed_shared%s" % nodename, "sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]", "}",
params=['int M', 'int N', "__syncthreads()", "}", ])
'const float * x', 'const int sx0', 'const int sx1', ret2 = nvcc_kernel(
'float * sm', 'const int sm_s0', 'const int sm_s1'], "kSoftmax_fixed_shared%s" % nodename,
body=[ params=['int M', 'int N',
"extern __shared__ float buf[]", 'const float * x', 'const int sx0', 'const int sx1',
"for (int blockIDX = blockIdx.x; blockIDX < M;" 'float * sm', 'const int sm_s0', 'const int sm_s1'],
" blockIDX += gridDim.x){", body=["extern __shared__ float buf[]",
"const float *x_ptr = &x[blockIDX * sx0]", "for (int blockIDX = blockIdx.x; blockIDX < M;"
"float *sm_ptr = &sm[blockIDX * sm_s0]", " blockIDX += gridDim.x){",
inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1', "const float *x_ptr = &x[blockIDX * sx0]",
'sm_ptr', 'sm_s1', "float *sm_ptr = &sm[blockIDX * sm_s0]",
'threadIdx.x', 'blockDim.x'), inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1',
"__syncthreads()", 'sm_ptr', 'sm_s1',
"}", 'threadIdx.x',
]) 'blockDim.x'),
"__syncthreads()", "}", ])
return ret1 + "\n" + ret2 return ret1 + "\n" + ret2
gpu_softmax = GpuSoftmax() gpu_softmax = GpuSoftmax()
...@@ -768,25 +769,20 @@ class GpuSoftmaxWithBias(GpuOp): ...@@ -768,25 +769,20 @@ class GpuSoftmaxWithBias(GpuOp):
'const float * x', 'const int sx0', 'const int sx1', 'const float * x', 'const int sx0', 'const int sx1',
'const float * b', 'const int sb0', 'const float * b', 'const int sb0',
'float * sm', 'const int sm_s0', 'const int sm_s1'], 'float * sm', 'const int sm_s0', 'const int sm_s1'],
body=[ body=["extern __shared__ float buf[]",
"extern __shared__ float buf[]", "float * buf2 = buf + N",
"float * buf2 = buf + N", "for (int blockIDX = blockIdx.x; blockIDX < M;"
"for (int blockIDX = blockIdx.x; blockIDX < M;" " blockIDX += gridDim.x){",
" blockIDX += gridDim.x){", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "buf[tx] = x[blockIDX * sx0 + tx * sx1]",
"buf[tx] = x[blockIDX * sx0 + tx * sx1]", "buf[tx] += b[tx * sb0]",
"buf[tx] += b[tx * sb0]", "buf2[tx] = buf[tx]", "}",
"buf2[tx] = buf[tx]", "__syncthreads()", inline_softmax('N', 'buf', 'buf2',
"}", 'threadIdx.x',
"__syncthreads()", 'blockDim.x'),
inline_softmax('N', 'buf', 'buf2', "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
'threadIdx.x', 'blockDim.x'), "sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]", "}",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "__syncthreads()", "}", ])
"sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]",
"}",
"__syncthreads()",
"}",
])
ret2 = nvcc_kernel("kSoftmaxWithBias_fixed_shared%s" % nodename, ret2 = nvcc_kernel("kSoftmaxWithBias_fixed_shared%s" % nodename,
params=['int M', 'int N', params=['int M', 'int N',
'const float * x', 'const float * x',
...@@ -802,7 +798,8 @@ class GpuSoftmaxWithBias(GpuOp): ...@@ -802,7 +798,8 @@ class GpuSoftmaxWithBias(GpuOp):
"float *sm_ptr = &sm[blockIDX * sm_s0]", "float *sm_ptr = &sm[blockIDX * sm_s0]",
inline_softmax_fixed_shared('N', 'buf', inline_softmax_fixed_shared('N', 'buf',
'x_ptr', 'sx1', 'x_ptr', 'sx1',
'sm_ptr', 'sm_s1', 'sm_ptr',
'sm_s1',
'threadIdx.x', 'threadIdx.x',
'blockDim.x', 'blockDim.x',
'b', 'sb0'), 'b', 'sb0'),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论