提交 2196f4a4 authored 作者: goodfeli's avatar goodfeli

Merge pull request #945 from nouiz/gpu_reduce_shape0

Gpu reduce shape0
......@@ -684,7 +684,10 @@ class GpuCAReduce(GpuOp):
# \begin bracket the reduction in a check that there is
# actually work to do
print >> sio, """
if (CudaNdarray_SIZE(%(z)s))
if (CudaNdarray_SIZE(%(z)s) && ! CudaNdarray_SIZE(%(x)s)){
cudaMemset(%(z)s->devdata, 0, CudaNdarray_SIZE(%(z)s) * sizeof(float));
}
else if (CudaNdarray_SIZE(%(z)s))
{
""" % locals()
......@@ -1553,13 +1556,13 @@ class GpuCAReduce(GpuOp):
""" % locals()
def c_code_cache_version_apply(self, node):
version = [5] # the version corresponding to the c code in this Op
version = [6] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype=output.type.dtype)() for output in node.outputs])
version.extend(self.scalar_op.c_code_cache_version(scalar_node))
version.extend(self.scalar_op.c_code_cache_version())
for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
if all(version):
......
......@@ -56,7 +56,15 @@ def test_sum():
TODO: test with broadcast
"""
for shape, pattern in [((100,3,1300),[1]),
for shape, pattern in [((1,1),(1,)),
((1,0),(1,)),
((0,1),(1,)),
((0,0),(1,)),
((0,0,0),(1,2)),
((0,0,0,0),(1,2,3)),
((2,1),(1,)),
((1,2),(1,)),
((100,3,1300),[1]),
((0,),[0]),((5,),[0]),
((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
......@@ -112,7 +120,7 @@ def test_sum():
assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
if val.size == 0:
assert f2(val) == f(val), ('shape', shape, 'pattern', pattern)
assert _allclose(f2(val), f(val)), ('shape', shape, 'pattern', pattern)
else:
try:
#We raise the error threashold as we sum big matrix
......@@ -275,16 +283,6 @@ def test_max():
except ValueError, e:
exc = e
f_caused_value_error = True
except RuntimeError:
if (shape, pattern) in [((1,0),(1,)),
((0,1),(1,)),
((0,0),(1,)),
((0,0,0),(1,2)),
((0,0,0,0),(1,2,3))]:
known_fail = True
continue
else:
raise
f2 = theano.function([a], b, mode=mode_without_gpu)
try:
......@@ -372,7 +370,6 @@ def test_max():
'pattern', pattern,
sum([shape[i] for i in pattern]))
#test with broadcast
for shape, pattern in [((5,),(0,)),
((5,4),(0,1)),
......@@ -417,9 +414,6 @@ def test_max():
'pattern', pattern,
sum([shape[i] for i in pattern]))
if known_fail:
raise KnownFailureTest("GpuCAReduce does not handle some shapes"
" with 0s in them correctly.")
def test_flatten():
x = cuda.fmatrix('x')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论