提交 da6f3836 authored 作者: Frederic's avatar Frederic

Fix crash as we used too much block in some cases.

上级 bb20af01
...@@ -1388,7 +1388,7 @@ class GpuCAReduce(GpuOp): ...@@ -1388,7 +1388,7 @@ class GpuCAReduce(GpuOp):
dim3 n_threads( dim3 n_threads(
std::min(CudaNdarray_HOST_DIMS(%(x)s)[0], std::min(CudaNdarray_HOST_DIMS(%(x)s)[0],
NUM_VECTOR_OP_THREADS_PER_BLOCK)); NUM_VECTOR_OP_THREADS_PER_BLOCK));
dim3 n_blocks(CudaNdarray_HOST_DIMS(%(x)s)[1]); dim3 n_blocks(std::min(CudaNdarray_HOST_DIMS(%(x)s)[1], NUM_VECTOR_OP_BLOCKS));
while (n_blocks.x * (n_blocks.y+1) <= NUM_VECTOR_OP_BLOCKS && n_blocks.y <= CudaNdarray_HOST_DIMS(%(x)s)[2]) while (n_blocks.x * (n_blocks.y+1) <= NUM_VECTOR_OP_BLOCKS && n_blocks.y <= CudaNdarray_HOST_DIMS(%(x)s)[2])
{ {
n_blocks.y += 1; n_blocks.y += 1;
...@@ -1565,7 +1565,7 @@ class GpuCAReduce(GpuOp): ...@@ -1565,7 +1565,7 @@ class GpuCAReduce(GpuOp):
""" % locals() """ % locals()
def c_code_cache_version_apply(self, node): def c_code_cache_version_apply(self, node):
version = [6] # the version corresponding to the c code in this Op version = [7] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend... # now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op, scalar_node = Apply(self.scalar_op,
......
...@@ -95,7 +95,7 @@ def test_careduce(): ...@@ -95,7 +95,7 @@ def test_careduce():
((1024,33),[0,1]),((33,1024),[0,1]),#01 ((1024,33),[0,1]),((33,1024),[0,1]),#01
((1025,33),[0,1]),((33,1025),[0,1]),#01 ((1025,33),[0,1]),((33,1025),[0,1]),#01
((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]),#100 ((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]), ((3,65536,1), [0]),#100
((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010 ((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010
((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001 ((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001
((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110 ((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论