提交 374cdd74 authored 作者: xiaoqie's avatar xiaoqie

style fix

上级 9eb54e01
...@@ -70,14 +70,14 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -70,14 +70,14 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
else: else:
f = '' if dtype_x == 'float64' else 'f' f = '' if dtype_x == 'float64' else 'f'
params = [ params = [
gpuarray.SIZE, gpuarray.SIZE, gpuarray.SIZE, gpuarray.SIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE
] ]
sio = StringIO() sio = StringIO()
print(""" print("""
KERNEL void %(kname)s(const ga_size M, const ga_size N, KERNEL void %(kname)s(const ga_size M, const ga_size N,
...@@ -183,7 +183,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -183,7 +183,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
} }
} }
""" % locals(), file=sio) """ % locals(), file=sio)
return [Kernel(code=sio.getvalue(), name=kname, params=params, return [Kernel(code=sio.getvalue(), name=kname, params=params,
flags=flags, objvar=k_var)] flags=flags, objvar=k_var)]
...@@ -424,12 +424,12 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op): ...@@ -424,12 +424,12 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
kname = "kCrossEntropySoftmax1HotWithBiasDx" kname = "kCrossEntropySoftmax1HotWithBiasDx"
k_var = "kCrossEntropySoftmax1HotWithBiasDx_" + nodename k_var = "kCrossEntropySoftmax1HotWithBiasDx_" + nodename
params = [ params = [
gpuarray.SIZE, gpuarray.SIZE, gpuarray.SIZE, gpuarray.SIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE,
] ]
sio = StringIO() sio = StringIO()
print(""" print("""
KERNEL void %(kname)s( KERNEL void %(kname)s(
...@@ -470,6 +470,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op): ...@@ -470,6 +470,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
return [Kernel(code=sio.getvalue(), name=kname, params=params, return [Kernel(code=sio.getvalue(), name=kname, params=params,
flags=flags, objvar=k_var)] flags=flags, objvar=k_var)]
gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx() gpu_crossentropy_softmax_1hot_with_bias_dx = GpuCrossentropySoftmax1HotWithBiasDx()
...@@ -599,15 +600,17 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -599,15 +600,17 @@ class GpuSoftmax(GpuKernelBase, Op):
ctype = gpuarray.dtype_to_ctype(dtype_sm) ctype = gpuarray.dtype_to_ctype(dtype_sm)
params = [ params = [
gpuarray.SIZE, gpuarray.SIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SIZE, gpuarray.SIZE,
gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE
] ]
kernels = [] kernels = []
kname = "kSoftmax" kname = "kSoftmax"
k_var = "kSoftmax_" + nodename k_var = "kSoftmax_" + nodename
code = """ code = """
KERNEL void %(kname)s (const ga_size M, const ga_size N, GLOBAL_MEM const %(type_x)s * x, const ga_size offset_x, KERNEL void %(kname)s (const ga_size M, const ga_size N,
const ga_ssize sx0, const ga_ssize sx1, GLOBAL_MEM %(type_sm)s * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1 GA_DECL_SHARED_PARAM(%(type_acc)s, buf)) GLOBAL_MEM const %(type_x)s * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1,
GLOBAL_MEM %(type_sm)s * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1 GA_DECL_SHARED_PARAM(%(type_acc)s, buf))
{ {
GA_DECL_SHARED_BODY(%(type_acc)s, buf); GA_DECL_SHARED_BODY(%(type_acc)s, buf);
...@@ -685,11 +688,12 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -685,11 +688,12 @@ class GpuSoftmax(GpuKernelBase, Op):
kname = "kSoftmax_fixed_shared" kname = "kSoftmax_fixed_shared"
k_var = "kSoftmax_fixed_shared" + nodename k_var = "kSoftmax_fixed_shared" + nodename
code = """ code = """
KERNEL void %(kname)s (const ga_size M, const ga_size N, GLOBAL_MEM const %(type_x)s * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1, KERNEL void %(kname)s (const ga_size M, const ga_size N,
GLOBAL_MEM %(type_sm)s * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1 GA_DECL_SHARED_PARAM(%(type_acc)s, buf)) GLOBAL_MEM const %(type_x)s * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1,
GLOBAL_MEM %(type_sm)s * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1 GA_DECL_SHARED_PARAM(%(type_acc)s, buf))
{ {
GA_DECL_SHARED_BODY(%(type_acc)s, buf); GA_DECL_SHARED_BODY(%(type_acc)s, buf);
x = (GLOBAL_MEM const %(type_x)s *)(((GLOBAL_MEM char *)x)+offset_x); x = (GLOBAL_MEM const %(type_x)s *)(((GLOBAL_MEM char *)x)+offset_x);
sm = (GLOBAL_MEM %(type_sm)s *)(((GLOBAL_MEM char *)sm)+offset_sm); sm = (GLOBAL_MEM %(type_sm)s *)(((GLOBAL_MEM char *)sm)+offset_sm);
for (ga_int blockIDX = GID_0; blockIDX < M; blockIDX += GDIM_0){ for (ga_int blockIDX = GID_0; blockIDX < M; blockIDX += GDIM_0){
...@@ -746,7 +750,7 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -746,7 +750,7 @@ class GpuSoftmax(GpuKernelBase, Op):
local_barrier(); local_barrier();
} }
} }
local_barrier(); local_barrier();
%(ctype)s row_sum = buf[0]; %(ctype)s row_sum = buf[0];
local_barrier(); local_barrier();
...@@ -762,6 +766,7 @@ class GpuSoftmax(GpuKernelBase, Op): ...@@ -762,6 +766,7 @@ class GpuSoftmax(GpuKernelBase, Op):
flags=flags, objvar=k_var)) flags=flags, objvar=k_var))
return kernels return kernels
gpu_softmax = GpuSoftmax() gpu_softmax = GpuSoftmax()
...@@ -909,26 +914,26 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -909,26 +914,26 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
type_b = gpuarray.dtype_to_ctype(dtype_b) type_b = gpuarray.dtype_to_ctype(dtype_b)
type_sm = gpuarray.dtype_to_ctype(dtype_sm) type_sm = gpuarray.dtype_to_ctype(dtype_sm)
type_acc = gpuarray.dtype_to_ctype(work_sm) type_acc = gpuarray.dtype_to_ctype(work_sm)
ctype = gpuarray.dtype_to_ctype(dtype_sm) ctype = gpuarray.dtype_to_ctype(dtype_sm)
params = [ params = [
gpuarray.SIZE, gpuarray.SIZE, gpuarray.SIZE, gpuarray.SIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE,
gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE, gpuarray.GpuArray, gpuarray.SIZE, gpuarray.SSIZE, gpuarray.SSIZE,
] ]
kernels = [] kernels = []
kname = "kSoftmaxWithBias" kname = "kSoftmaxWithBias"
k_var = "kSoftmaxWithBias_" + nodename k_var = "kSoftmaxWithBias_" + nodename
code = """ code = """
KERNEL void %(kname)s (const ga_size M, const ga_size N, KERNEL void %(kname)s (const ga_size M, const ga_size N,
GLOBAL_MEM const %(type_x)s * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1, GLOBAL_MEM const %(type_x)s * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1,
GLOBAL_MEM const %(type_b)s * b, const ga_size offset_b, const ga_ssize sb0, GLOBAL_MEM const %(type_b)s * b, const ga_size offset_b, const ga_ssize sb0,
GLOBAL_MEM %(type_sm)s * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1 GA_DECL_SHARED_PARAM(%(type_acc)s, buf)) GLOBAL_MEM %(type_sm)s * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1 GA_DECL_SHARED_PARAM(%(type_acc)s, buf))
{ {
GA_DECL_SHARED_BODY(%(type_acc)s, buf); GA_DECL_SHARED_BODY(%(type_acc)s, buf);
LOCAL_MEM %(type_acc)s * buf2 = buf + N; LOCAL_MEM %(type_acc)s * buf2 = buf + N;
x = (GLOBAL_MEM const %(type_x)s *)(((GLOBAL_MEM char *)x)+offset_x); x = (GLOBAL_MEM const %(type_x)s *)(((GLOBAL_MEM char *)x)+offset_x);
b = (GLOBAL_MEM const %(type_b)s *)(((GLOBAL_MEM char *)b)+offset_b); b = (GLOBAL_MEM const %(type_b)s *)(((GLOBAL_MEM char *)b)+offset_b);
...@@ -959,7 +964,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -959,7 +964,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
local_barrier(); local_barrier();
} }
} }
local_barrier(); local_barrier();
%(ctype)s row_max = buf[0]; %(ctype)s row_max = buf[0];
local_barrier(); local_barrier();
...@@ -987,7 +992,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -987,7 +992,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
local_barrier(); local_barrier();
} }
} }
local_barrier(); local_barrier();
%(ctype)s row_sum = buf[0]; %(ctype)s row_sum = buf[0];
local_barrier(); local_barrier();
...@@ -1007,13 +1012,13 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -1007,13 +1012,13 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
kname = "kSoftmaxWithBias_fixed_shared" kname = "kSoftmaxWithBias_fixed_shared"
k_var = "kSoftmaxWithBias_fixed_shared" + nodename k_var = "kSoftmaxWithBias_fixed_shared" + nodename
code = """ code = """
KERNEL void %(kname)s (const ga_size M, const ga_size N, KERNEL void %(kname)s (const ga_size M, const ga_size N,
GLOBAL_MEM const %(type_x)s * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1, GLOBAL_MEM const %(type_x)s * x, const ga_size offset_x, const ga_ssize sx0, const ga_ssize sx1,
GLOBAL_MEM const %(type_b)s * b, const ga_size offset_b, const ga_ssize sb0, GLOBAL_MEM const %(type_b)s * b, const ga_size offset_b, const ga_ssize sb0,
GLOBAL_MEM %(type_sm)s * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1 GA_DECL_SHARED_PARAM(%(type_acc)s, buf)) GLOBAL_MEM %(type_sm)s * sm, const ga_size offset_sm, const ga_ssize sm_s0, const ga_ssize sm_s1 GA_DECL_SHARED_PARAM(%(type_acc)s, buf))
{ {
GA_DECL_SHARED_BODY(%(type_acc)s, buf); GA_DECL_SHARED_BODY(%(type_acc)s, buf);
x = (GLOBAL_MEM const %(type_x)s *)(((GLOBAL_MEM char *)x)+offset_x); x = (GLOBAL_MEM const %(type_x)s *)(((GLOBAL_MEM char *)x)+offset_x);
b = (GLOBAL_MEM const %(type_b)s *)(((GLOBAL_MEM char *)b)+offset_b); b = (GLOBAL_MEM const %(type_b)s *)(((GLOBAL_MEM char *)b)+offset_b);
sm = (GLOBAL_MEM %(type_sm)s *)(((GLOBAL_MEM char *)sm)+offset_sm); sm = (GLOBAL_MEM %(type_sm)s *)(((GLOBAL_MEM char *)sm)+offset_sm);
...@@ -1044,7 +1049,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -1044,7 +1049,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
local_barrier(); local_barrier();
} }
} }
local_barrier(); local_barrier();
%(ctype)s row_max = buf[0]; %(ctype)s row_max = buf[0];
local_barrier(); local_barrier();
...@@ -1072,7 +1077,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -1072,7 +1077,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
local_barrier(); local_barrier();
} }
} }
local_barrier(); local_barrier();
%(ctype)s row_sum = buf[0]; %(ctype)s row_sum = buf[0];
local_barrier(); local_barrier();
...@@ -1088,4 +1093,5 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op): ...@@ -1088,4 +1093,5 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
flags=flags, objvar=k_var)) flags=flags, objvar=k_var))
return kernels return kernels
gpu_softmax_with_bias = GpuSoftmaxWithBias() gpu_softmax_with_bias = GpuSoftmaxWithBias()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论