提交 e4011c33 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix forgottent GpuCAReduceCuda

上级 118d9fb2
...@@ -1253,6 +1253,8 @@ class GpuCAReduceCuda(HideC, CAReduceDtype): ...@@ -1253,6 +1253,8 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
# nb_reduce<=warpSize # nb_reduce<=warpSize
def _k_reduce_buf_multiple(self, z_pos, node, name, nb_reduce): def _k_reduce_buf_multiple(self, z_pos, node, name, nb_reduce):
reduce_fct = self._assign_reduce(node, name, 'myresult', 'buf[i]', {}, False) reduce_fct = self._assign_reduce(node, name, 'myresult', 'buf[i]', {}, False)
write_out = write_w(node.outputs[0].dtype)
return """ return """
__syncthreads(); // some kernel do multiple reduction. __syncthreads(); // some kernel do multiple reduction.
buf[threadNum] = myresult; buf[threadNum] = myresult;
...@@ -1266,7 +1268,7 @@ class GpuCAReduceCuda(HideC, CAReduceDtype): ...@@ -1266,7 +1268,7 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
{ {
%(reduce_fct)s; %(reduce_fct)s;
} }
%(z_pos)s = myresult; %(z_pos)s = %(write_out)s(myresult);
} }
""" % locals() """ % locals()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论