提交 5e82e974 authored 作者: Frederic's avatar Frederic

Fix crash reported by Arjun Jain on the mailing list with gpu reduction 1111 and strided inputs.

上级 bc6dfee6
......@@ -1572,6 +1572,9 @@ class GpuCAReduce(GpuOp):
}
n_threads.z -= 1;
//Maximum for Fermi GPU on that dimensions.
n_threads.z = std::min(n_threads.z, (unsigned)64);
dim3 n_blocks(1,1,1);
%(makecall)s
}
......@@ -1602,7 +1605,7 @@ class GpuCAReduce(GpuOp):
""" % locals()
def c_code_cache_version_apply(self, node):
version = [7] # the version corresponding to the c code in this Op
version = [8] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op,
......
......@@ -114,7 +114,7 @@ def test_careduce():
((4100,4,3,2),[1,3]),((4,4100,3,2),[1,3]),((4,3,4100,2),[1,3]),((4,3,2,4100),[1,3]),#0101
((4100,4,3,2),[0,2,3]),((4,4100,3,2),[0,2,3]),((4,3,4100,2),[0,2,3]),#((4,3,2,4100),[0,2,3]),#1011
((4100,4,3,2),[1,2,3]),((4,4100,3,2),[1,2,3]),((4,3,4100,2),[1,2,3]),((4,3,2,4100),[1,2,3]),#0111
((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),#1111
((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),((128,1,3,3), [0,1,2,3]),#1111
#test pattern implemented by reshape
......@@ -197,7 +197,9 @@ def test_careduce():
for shape, pattern in [#((5,),[0]),
((5,4),[0,1]),((5,4),[0]),
((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),
((128,1,3,3),[0,1,2,3]),
]:
op = careduce_op(scalar_op, axis=pattern)
pat = tensor_pattern_to_gpu_pattern(shape, pattern)
......@@ -229,7 +231,9 @@ def test_careduce():
((5,4),[0,1]),((5,4),[0]),
((5,4,3),[0]),((5,4,3),[0,1]),
((5,4,3),[2]),((5,4,3),[0,1,2]),
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),
((128,1,3,3),[0,1,2,3]),
]:
op = careduce_op(scalar_op, axis=pattern)
pat = tensor_pattern_to_gpu_pattern(shape, pattern)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论