提交 1ae40316 authored 作者: Frederic Bastien's avatar Frederic Bastien

bugfix of GpuSum. The code crashed in some case.

上级 0a690d51
...@@ -893,13 +893,10 @@ class GpuSum(Op): ...@@ -893,13 +893,10 @@ class GpuSum(Op):
std::min(CudaNdarray_HOST_DIMS(%(x)s)[0], std::min(CudaNdarray_HOST_DIMS(%(x)s)[0],
NUM_VECTOR_OP_THREADS_PER_BLOCK)); NUM_VECTOR_OP_THREADS_PER_BLOCK));
dim3 n_blocks(CudaNdarray_HOST_DIMS(%(x)s)[1]); dim3 n_blocks(CudaNdarray_HOST_DIMS(%(x)s)[1]);
while (n_blocks.x * n_blocks.y <= NUM_VECTOR_OP_BLOCKS) while (n_blocks.x * (n_blocks.y+1) <= NUM_VECTOR_OP_BLOCKS && n_blocks.y <= CudaNdarray_HOST_DIMS(%(x)s)[2])
{ {
if (n_blocks.y > CudaNdarray_HOST_DIMS(%(x)s)[2])
break;
n_blocks.y += 1; n_blocks.y += 1;
} }
n_blocks.y -= 1;
%(makecall)s %(makecall)s
} }
""" %locals() """ %locals()
...@@ -1095,7 +1092,7 @@ class GpuSum(Op): ...@@ -1095,7 +1092,7 @@ class GpuSum(Op):
""" %locals() """ %locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (13,) return (14,)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论