提交 d521a6c8 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3982 from abergeron/fix_nnet

Fix Gpu......WithBias to work properly for float64.
...@@ -108,7 +108,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -108,7 +108,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
for (int j = threadIdx.x; j < N; j += blockDim.x) for (int j = threadIdx.x; j < N; j += blockDim.x)
{ {
float row_ij = %(load_x)s(x[j * xs1]) + %(load_b)s(b[j * bs0]); %(work_x)s row_ij = %(load_x)s(x[j * xs1]) + %(load_b)s(b[j * bs0]);
per_thread_row_max_j = (row_ij > per_thread_row_max) ? j : per_thread_row_max_j; per_thread_row_max_j = (row_ij > per_thread_row_max) ? j : per_thread_row_max_j;
per_thread_row_max = fmax%(f)s(row_ij, per_thread_row_max); per_thread_row_max = fmax%(f)s(row_ij, per_thread_row_max);
} }
...@@ -121,7 +121,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -121,7 +121,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
row_max_threadIdx = 0; row_max_threadIdx = 0;
for (int j = 0; j < blockDim.x; j++) for (int j = 0; j < blockDim.x; j++)
{ {
float per_thread_max = per_thread_values[j]; %(work_x)s per_thread_max = per_thread_values[j];
row_max_threadIdx = (per_thread_max > row_max) ? j : row_max_threadIdx; row_max_threadIdx = (per_thread_max > row_max) ? j : row_max_threadIdx;
row_max = fmax%(f)s(per_thread_max, row_max); row_max = fmax%(f)s(per_thread_max, row_max);
} }
...@@ -340,7 +340,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op): ...@@ -340,7 +340,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
return sio.getvalue() return sio.getvalue()
def c_code_cache_version(self): def c_code_cache_version(self):
return (9,) return (10,)
gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias() gpu_crossentropy_softmax_argmax_1hot_with_bias = GpuCrossentropySoftmaxArgmax1HotWithBias()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论