提交 1ae40316 authored 作者: Frederic Bastien's avatar Frederic Bastien

bugfix of GpuSum. The code crashed in some case.

上级 0a690d51
......@@ -893,13 +893,10 @@ class GpuSum(Op):
std::min(CudaNdarray_HOST_DIMS(%(x)s)[0],
NUM_VECTOR_OP_THREADS_PER_BLOCK));
dim3 n_blocks(CudaNdarray_HOST_DIMS(%(x)s)[1]);
while (n_blocks.x * n_blocks.y <= NUM_VECTOR_OP_BLOCKS)
while (n_blocks.x * (n_blocks.y+1) <= NUM_VECTOR_OP_BLOCKS && n_blocks.y <= CudaNdarray_HOST_DIMS(%(x)s)[2])
{
if (n_blocks.y > CudaNdarray_HOST_DIMS(%(x)s)[2])
break;
n_blocks.y += 1;
}
n_blocks.y -= 1;
%(makecall)s
}
""" %locals()
......@@ -1095,7 +1092,7 @@ class GpuSum(Op):
""" %locals()
def c_code_cache_version(self):
return (13,)
return (14,)
def c_support_code_apply(self, node, nodename):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论