提交 de9cde80 authored 作者: Frederic Bastien's avatar Frederic Bastien

make GpuSum work correctly with tensor of 0 element.

上级 bbbe245d
...@@ -684,6 +684,9 @@ class GpuSum(Op): ...@@ -684,6 +684,9 @@ class GpuSum(Op):
def c_code_reduce_ccontig(self, sio, node, name, x, z, fail): def c_code_reduce_ccontig(self, sio, node, name, x, z, fail):
print >> sio, """ print >> sio, """
{ {
if(CudaNdarray_SIZE(%(x)s)==0){
cudaMemset(CudaNdarray_DEV_DATA(%(z)s),0,sizeof(float));
}else{
int verbose = 0; int verbose = 0;
dim3 n_threads( dim3 n_threads(
std::min(CudaNdarray_SIZE(%(x)s), std::min(CudaNdarray_SIZE(%(x)s),
...@@ -710,6 +713,7 @@ class GpuSum(Op): ...@@ -710,6 +713,7 @@ class GpuSum(Op):
n_threads.z); n_threads.z);
%(fail)s; %(fail)s;
} }
}
} }
""" %locals() """ %locals()
...@@ -1091,7 +1095,7 @@ class GpuSum(Op): ...@@ -1091,7 +1095,7 @@ class GpuSum(Op):
""" %locals() """ %locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (12,) return (13,)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
......
...@@ -34,8 +34,7 @@ def test_sum(): ...@@ -34,8 +34,7 @@ def test_sum():
test sum pattern 1, 11, 10, 01, 100, 110, 011, 001, 111, 0011, 0111, 1011, 1111 test sum pattern 1, 11, 10, 01, 100, 110, 011, 001, 111, 0011, 0111, 1011, 1111
TODO: test with broadcast TODO: test with broadcast
""" """
for shape, pattern in [((0,),[0]),((5,),[0]),
for shape, pattern in [((5,),[0]),
((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test. ((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]), ((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3])]: ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3])]:
...@@ -49,7 +48,10 @@ def test_sum(): ...@@ -49,7 +48,10 @@ def test_sum():
f2 = theano.function([a],b, mode=mode_without_gpu) f2 = theano.function([a],b, mode=mode_without_gpu)
assert tcn.GpuSum in [x.op.__class__ for x in f.maker.env.toposort()] assert tcn.GpuSum in [x.op.__class__ for x in f.maker.env.toposort()]
assert T.Sum in [x.op.__class__ for x in f2.maker.env.toposort()] assert T.Sum in [x.op.__class__ for x in f2.maker.env.toposort()]
assert numpy.allclose(f2(val),f(val)) if val.size==0:
assert f2(val)==f(val)
else:
assert numpy.allclose(f2(val),f(val))
#test with dimshuffle #test with dimshuffle
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论