提交 755d7cdf authored 作者: Chiheb Trabelsi's avatar Chiheb Trabelsi

nnet.py has been modified in order to respect the flake8 style

nnet.py has been fixed nnet.py do not contain long lines.
上级 06adacd3
...@@ -578,45 +578,46 @@ class GpuSoftmax(GpuOp): ...@@ -578,45 +578,46 @@ class GpuSoftmax(GpuOp):
""" % locals() """ % locals()
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
ret1 = nvcc_kernel("kSoftmax_%s" % nodename, ret1 = nvcc_kernel(
"kSoftmax_%s" % nodename,
params=['int M', 'int N', params=['int M', 'int N',
'const float * x', 'const int sx0', 'const int sx1', 'const float * x',
'float * sm', 'const int sm_s0', 'const int sm_s1'], 'const int sx0',
body=[ 'const int sx1',
"extern __shared__ float buf[]", 'float * sm',
'const int sm_s0',
'const int sm_s1'],
body=["extern __shared__ float buf[]",
"float * buf2 = buf + N", "float * buf2 = buf + N",
"for (int blockIDX = blockIdx.x; blockIDX < M;" "for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){", " blockIDX += gridDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"buf[tx] = x[blockIDX * sx0 + tx * sx1]", "buf[tx] = x[blockIDX * sx0 + tx * sx1]",
"buf2[tx] = buf[tx]", "buf2[tx] = buf[tx]", "}", "__syncthreads()",
"}", inline_softmax('N',
"__syncthreads()", 'buf',
inline_softmax('N', 'buf', 'buf2', 'buf2',
'threadIdx.x', 'blockDim.x'), 'threadIdx.x',
'blockDim.x'),
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
# This set all value correctly # This set all value correctly
"sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]", "sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]", "}",
"}", "__syncthreads()", "}", ])
"__syncthreads()", ret2 = nvcc_kernel(
"}", "kSoftmax_fixed_shared%s" % nodename,
])
ret2 = nvcc_kernel("kSoftmax_fixed_shared%s" % nodename,
params=['int M', 'int N', params=['int M', 'int N',
'const float * x', 'const int sx0', 'const int sx1', 'const float * x', 'const int sx0', 'const int sx1',
'float * sm', 'const int sm_s0', 'const int sm_s1'], 'float * sm', 'const int sm_s0', 'const int sm_s1'],
body=[ body=["extern __shared__ float buf[]",
"extern __shared__ float buf[]",
"for (int blockIDX = blockIdx.x; blockIDX < M;" "for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){", " blockIDX += gridDim.x){",
"const float *x_ptr = &x[blockIDX * sx0]", "const float *x_ptr = &x[blockIDX * sx0]",
"float *sm_ptr = &sm[blockIDX * sm_s0]", "float *sm_ptr = &sm[blockIDX * sm_s0]",
inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1', inline_softmax_fixed_shared('N', 'buf', 'x_ptr', 'sx1',
'sm_ptr', 'sm_s1', 'sm_ptr', 'sm_s1',
'threadIdx.x', 'blockDim.x'), 'threadIdx.x',
"__syncthreads()", 'blockDim.x'),
"}", "__syncthreads()", "}", ])
])
return ret1 + "\n" + ret2 return ret1 + "\n" + ret2
gpu_softmax = GpuSoftmax() gpu_softmax = GpuSoftmax()
...@@ -768,25 +769,20 @@ class GpuSoftmaxWithBias(GpuOp): ...@@ -768,25 +769,20 @@ class GpuSoftmaxWithBias(GpuOp):
'const float * x', 'const int sx0', 'const int sx1', 'const float * x', 'const int sx0', 'const int sx1',
'const float * b', 'const int sb0', 'const float * b', 'const int sb0',
'float * sm', 'const int sm_s0', 'const int sm_s1'], 'float * sm', 'const int sm_s0', 'const int sm_s1'],
body=[ body=["extern __shared__ float buf[]",
"extern __shared__ float buf[]",
"float * buf2 = buf + N", "float * buf2 = buf + N",
"for (int blockIDX = blockIdx.x; blockIDX < M;" "for (int blockIDX = blockIdx.x; blockIDX < M;"
" blockIDX += gridDim.x){", " blockIDX += gridDim.x){",
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"buf[tx] = x[blockIDX * sx0 + tx * sx1]", "buf[tx] = x[blockIDX * sx0 + tx * sx1]",
"buf[tx] += b[tx * sb0]", "buf[tx] += b[tx * sb0]",
"buf2[tx] = buf[tx]", "buf2[tx] = buf[tx]", "}",
"}", "__syncthreads()", inline_softmax('N', 'buf', 'buf2',
"__syncthreads()", 'threadIdx.x',
inline_softmax('N', 'buf', 'buf2', 'blockDim.x'),
'threadIdx.x', 'blockDim.x'),
"for (int tx = threadIdx.x; tx< N; tx += blockDim.x){", "for (int tx = threadIdx.x; tx< N; tx += blockDim.x){",
"sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]", "sm[blockIDX * sm_s0 + tx * sm_s1] = buf[tx]", "}",
"}", "__syncthreads()", "}", ])
"__syncthreads()",
"}",
])
ret2 = nvcc_kernel("kSoftmaxWithBias_fixed_shared%s" % nodename, ret2 = nvcc_kernel("kSoftmaxWithBias_fixed_shared%s" % nodename,
params=['int M', 'int N', params=['int M', 'int N',
'const float * x', 'const float * x',
...@@ -802,7 +798,8 @@ class GpuSoftmaxWithBias(GpuOp): ...@@ -802,7 +798,8 @@ class GpuSoftmaxWithBias(GpuOp):
"float *sm_ptr = &sm[blockIDX * sm_s0]", "float *sm_ptr = &sm[blockIDX * sm_s0]",
inline_softmax_fixed_shared('N', 'buf', inline_softmax_fixed_shared('N', 'buf',
'x_ptr', 'sx1', 'x_ptr', 'sx1',
'sm_ptr', 'sm_s1', 'sm_ptr',
'sm_s1',
'threadIdx.x', 'threadIdx.x',
'blockDim.x', 'blockDim.x',
'b', 'sb0'), 'b', 'sb0'),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论