提交 de9cde80 authored 作者: Frederic Bastien's avatar Frederic Bastien

make GpuSum work correctly with tensor of 0 element.

上级 bbbe245d
......@@ -684,6 +684,9 @@ class GpuSum(Op):
def c_code_reduce_ccontig(self, sio, node, name, x, z, fail):
print >> sio, """
{
if(CudaNdarray_SIZE(%(x)s)==0){
cudaMemset(CudaNdarray_DEV_DATA(%(z)s),0,sizeof(float));
}else{
int verbose = 0;
dim3 n_threads(
std::min(CudaNdarray_SIZE(%(x)s),
......@@ -710,6 +713,7 @@ class GpuSum(Op):
n_threads.z);
%(fail)s;
}
}
}
""" %locals()
......@@ -1091,7 +1095,7 @@ class GpuSum(Op):
""" %locals()
def c_code_cache_version(self):
return (12,)
return (13,)
def c_support_code_apply(self, node, nodename):
......
......@@ -34,8 +34,7 @@ def test_sum():
test sum pattern 1, 11, 10, 01, 100, 110, 011, 001, 111, 0011, 0111, 1011, 1111
TODO: test with broadcast
"""
for shape, pattern in [((5,),[0]),
for shape, pattern in [((0,),[0]),((5,),[0]),
((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3])]:
......@@ -49,7 +48,10 @@ def test_sum():
f2 = theano.function([a],b, mode=mode_without_gpu)
assert tcn.GpuSum in [x.op.__class__ for x in f.maker.env.toposort()]
assert T.Sum in [x.op.__class__ for x in f2.maker.env.toposort()]
assert numpy.allclose(f2(val),f(val))
if val.size==0:
assert f2(val)==f(val)
else:
assert numpy.allclose(f2(val),f(val))
#test with dimshuffle
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论