提交 e4011c33 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix forgottent GpuCAReduceCuda

上级 118d9fb2
......@@ -1253,6 +1253,8 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
# nb_reduce<=warpSize
def _k_reduce_buf_multiple(self, z_pos, node, name, nb_reduce):
reduce_fct = self._assign_reduce(node, name, 'myresult', 'buf[i]', {}, False)
write_out = write_w(node.outputs[0].dtype)
return """
__syncthreads(); // some kernel do multiple reduction.
buf[threadNum] = myresult;
......@@ -1266,7 +1268,7 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
{
%(reduce_fct)s;
}
%(z_pos)s = myresult;
%(z_pos)s = %(write_out)s(myresult);
}
""" % locals()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论