提交 2196f4a4 authored 作者: goodfeli's avatar goodfeli

Merge pull request #945 from nouiz/gpu_reduce_shape0

Gpu reduce shape0
...@@ -684,7 +684,10 @@ class GpuCAReduce(GpuOp): ...@@ -684,7 +684,10 @@ class GpuCAReduce(GpuOp):
# \begin bracket the reduction in a check that there is # \begin bracket the reduction in a check that there is
# actually work to do # actually work to do
print >> sio, """ print >> sio, """
if (CudaNdarray_SIZE(%(z)s)) if (CudaNdarray_SIZE(%(z)s) && ! CudaNdarray_SIZE(%(x)s)){
cudaMemset(%(z)s->devdata, 0, CudaNdarray_SIZE(%(z)s) * sizeof(float));
}
else if (CudaNdarray_SIZE(%(z)s))
{ {
""" % locals() """ % locals()
...@@ -1553,13 +1556,13 @@ class GpuCAReduce(GpuOp): ...@@ -1553,13 +1556,13 @@ class GpuCAReduce(GpuOp):
""" % locals() """ % locals()
def c_code_cache_version_apply(self, node): def c_code_cache_version_apply(self, node):
version = [5] # the version corresponding to the c code in this Op version = [6] # the version corresponding to the c code in this Op
# now we insert versions for the ops on which we depend... # now we insert versions for the ops on which we depend...
scalar_node = Apply(self.scalar_op, scalar_node = Apply(self.scalar_op,
[Scalar(dtype=input.type.dtype)() for input in node.inputs], [Scalar(dtype=input.type.dtype)() for input in node.inputs],
[Scalar(dtype=output.type.dtype)() for output in node.outputs]) [Scalar(dtype=output.type.dtype)() for output in node.outputs])
version.extend(self.scalar_op.c_code_cache_version(scalar_node)) version.extend(self.scalar_op.c_code_cache_version())
for i in node.inputs + node.outputs: for i in node.inputs + node.outputs:
version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version()) version.extend(Scalar(dtype=i.type.dtype).c_code_cache_version())
if all(version): if all(version):
......
...@@ -56,7 +56,15 @@ def test_sum(): ...@@ -56,7 +56,15 @@ def test_sum():
TODO: test with broadcast TODO: test with broadcast
""" """
for shape, pattern in [((100,3,1300),[1]), for shape, pattern in [((1,1),(1,)),
((1,0),(1,)),
((0,1),(1,)),
((0,0),(1,)),
((0,0,0),(1,2)),
((0,0,0,0),(1,2,3)),
((2,1),(1,)),
((1,2),(1,)),
((100,3,1300),[1]),
((0,),[0]),((5,),[0]), ((0,),[0]),((5,),[0]),
((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test. ((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]), ((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
...@@ -112,7 +120,7 @@ def test_sum(): ...@@ -112,7 +120,7 @@ def test_sum():
assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()] assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()] assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
if val.size == 0: if val.size == 0:
assert f2(val) == f(val), ('shape', shape, 'pattern', pattern) assert _allclose(f2(val), f(val)), ('shape', shape, 'pattern', pattern)
else: else:
try: try:
#We raise the error threashold as we sum big matrix #We raise the error threashold as we sum big matrix
...@@ -275,16 +283,6 @@ def test_max(): ...@@ -275,16 +283,6 @@ def test_max():
except ValueError, e: except ValueError, e:
exc = e exc = e
f_caused_value_error = True f_caused_value_error = True
except RuntimeError:
if (shape, pattern) in [((1,0),(1,)),
((0,1),(1,)),
((0,0),(1,)),
((0,0,0),(1,2)),
((0,0,0,0),(1,2,3))]:
known_fail = True
continue
else:
raise
f2 = theano.function([a], b, mode=mode_without_gpu) f2 = theano.function([a], b, mode=mode_without_gpu)
try: try:
...@@ -372,7 +370,6 @@ def test_max(): ...@@ -372,7 +370,6 @@ def test_max():
'pattern', pattern, 'pattern', pattern,
sum([shape[i] for i in pattern])) sum([shape[i] for i in pattern]))
#test with broadcast #test with broadcast
for shape, pattern in [((5,),(0,)), for shape, pattern in [((5,),(0,)),
((5,4),(0,1)), ((5,4),(0,1)),
...@@ -417,9 +414,6 @@ def test_max(): ...@@ -417,9 +414,6 @@ def test_max():
'pattern', pattern, 'pattern', pattern,
sum([shape[i] for i in pattern])) sum([shape[i] for i in pattern]))
if known_fail:
raise KnownFailureTest("GpuCAReduce does not handle some shapes"
" with 0s in them correctly.")
def test_flatten(): def test_flatten():
x = cuda.fmatrix('x') x = cuda.fmatrix('x')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论