提交 0478beb3 authored 作者: Ian Goodfellow's avatar Ian Goodfellow

added test of GpuCAReduce max

上级 b9d126e6
......@@ -187,6 +187,169 @@ def test_sum():
'pattern', pattern,
sum([shape[i] for i in pattern]))
def test_max():
"""
test GpuMax pattern 01, 011, 0111 (tensor.max pattern (1,), (1,2), (1,2,3) )
TODO: are others currently implemented by reshape?
"""
def tensor_pattern_to_gpu_pattern(shape, pattern):
gpu_pattern = [ 0 for elem in shape ]
for idx in pattern:
gpu_pattern[idx] = 1
gpu_pattern = tuple(gpu_pattern)
return gpu_pattern
for shape, pattern in [((100,3,1300),[1]),
((0,),[0]),((5,),[0]),
((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
((0,0,0,0),[0,1,2,3]),
((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
((5,4,3,10,11),[1,2]),
((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
#test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
((4100,3),[0]),((3,4101),[0]),#10
((1024,33),[0]),((33,1024),[0]),#10
((1025,33),[0]),((33,1025),[0]),#10
((4100,3),[1]),((3,4101),[1]),#01
((1024,33),[1]),((33,1024),[1]),#01
((1025,33),[1]),((33,1025),[1]),#01
((4100,3),[0,1]),((3,4101),[0,1]),#11
((1024,33),[0,1]),((33,1024),[0,1]),#01
((1025,33),[0,1]),((33,1025),[0,1]),#01
((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]),#100
((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010
((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001
((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110
((4100,4,3),[1,2]),((5,4100,3),[1,2]),((5,4,4100),[1,2]),#011
#((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
((4100,4,3),[0,1,2]),((5,4100,3),[0,1,2]),((5,4,4100),[0,1,2]),#111
((4100,4,3,2),[2,3]),((4,4100,3,2),[2,3]),((4,3,4100,2),[2,3]),((4,3,2,4100),[2,3]),#0011
((4100,4,3,2),[1,3]),((4,4100,3,2),[1,3]),((4,3,4100,2),[1,3]),((4,3,2,4100),[1,3]),#0101
((4100,4,3,2),[0,2,3]),((4,4100,3,2),[0,2,3]),((4,3,4100,2),[0,2,3]),#((4,3,2,4100),[0,2,3]),#1011
((4100,4,3,2),[1,2,3]),((4,4100,3,2),[1,2,3]),((4,3,4100,2),[1,2,3]),((4,3,2,4100),[1,2,3]),#0111
((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),#1111
#test pattern implemented by reshape
((4100,4,3,2),[0]),((4,4100,3,2),[0]),((4,3,4100,2),[0]),((4,3,2,4100),[0]),#1000
((4100,4,3,2),[1]),((4,4100,3,2),[1]),((4,3,4100,2),[1]),((4,3,2,4100),[1]),#0100
((4100,4,3,2),[2]),((4,4100,3,2),[2]),((4,3,4100,2),[2]),((4,3,2,4100),[2]),#0010
((4100,4,3,2),[3]),((4,4100,3,2),[3]),((4,3,4100,2),[3]),((4,3,2,4100),[3]),#0001
((1100,2,3,4,5),[0,1,2,3,4]),((2,1100,3,4,5),[0,1,2,3,4]),((2,3,1100,4,5),[0,1,2,3,4]),((2,3,4,1100,5),[0,1,2,3,4]),((2,3,4,5,1100),[0,1,2,3,4]),#11111
]:
# Don't test patterns that aren't implemented for max yet
if tensor_pattern_to_gpu_pattern(shape, pattern) not in \
[ (0,1), (0,1,1), (0,1,1) ]:
continue
a = tensor.TensorType('float32', (False,) * len(shape))()
b = T.max(a, pattern)
val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val = theano._asarray(val, dtype='float32')
f = theano.function([a], b, mode=mode_with_gpu)
f2 = theano.function([a], b, mode=mode_without_gpu)
assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert T.CAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
if val.size == 0:
assert f2(val) == f(val), ('shape', shape, 'pattern', pattern)
else:
try:
#We raise the error threashold as we sum big matrix
#and this cause small rounding difference with some seed
#example in debug mode with unittests.rseed=9275
orig_rtol = theano.tensor.basic.float32_rtol
theano.tensor.basic.float32_rtol = 2e-5
assert _allclose(f2(val), f(val)), ('shape', shape,
'pattern', pattern,
sum([shape[i] for i in pattern]),
f2(val), f(val), val)
finally:
theano.tensor.basic.float32_rtol = orig_rtol
#test with dimshuffle
#we shuffle the 2 outer dims.
for shape, pattern in [#((5,),[0]),
((5,4),(0,1)),((5,4),[0]),
((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
# Don't test patterns that aren't implemented for max yet
if tensor_pattern_to_gpu_pattern(shape, pattern) not in \
[ (0,1), (0,1,1), (0,1,1) ]:
continue
a = tensor.TensorType('float32', (False,) * len(shape))()
dim_pattern = range(len(shape))
dim_pattern[0] = 1
dim_pattern[1] = 0
a = a.dimshuffle(dim_pattern)
b = T.max(a, pattern)
val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val = theano._asarray(val, dtype='float32')
f = theano.function([a], b, mode=mode_with_gpu)
f2 = theano.function([a], b, mode=mode_without_gpu)
assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert T.CAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
assert _allclose(f2(val), f(val)), ('shape', shape,
'pattern', pattern,
sum([shape[i] for i in pattern]))
#test with broadcast
for shape, pattern in [((5,),(0,)),
((5,4),(0,1)),
((5,4),(0,)),
((5,4,3),(0,)),
((5,4,3),(0,1)),
((5,4,3),(2,)),
((5,4,3),(0,1,2)),
((5,4,3,2),(0,1,2,3)),
((5,4,3,2),(0,2,3))]:
# Don't test patterns that aren't implemented for max yet
if tensor_pattern_to_gpu_pattern(shape, pattern) not in \
[ (0,1), (0,1,1), (0,1,1) ]:
continue
shape = numpy.asarray(shape) * 2
a = tensor.TensorType('float32', (False,) * len(shape))()
a2 = tcn.CudaNdarrayType((False,) * len(shape))()
b = T.max(a, pattern)
b2 = T.max(a2, pattern)
val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val = theano._asarray(val, dtype='float32')
val2 = cuda.CudaNdarray(val)
if len(shape) == 1:
val = val[::2]
val2 = val2[::2]
elif len(shape) == 2:
val = val[::2, ::2]
val2 = val2[::2, ::2]
elif len(shape) == 3:
val = val[::2, ::2, ::2]
val2 = val2[::2, ::2, ::2]
elif len(shape) == 4:
val = val[::2, ::2, ::2, ::2]
val2 = val2[::2, ::2, ::2, ::2]
f = theano.function([a], b, mode=mode_without_gpu)
f2 = theano.function([a2], b2, mode=mode_with_gpu)
assert tcn.GpuCAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
assert T.CAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert _allclose(f2(val2), f(val)), ('shape', shape,
'pattern', pattern,
sum([shape[i] for i in pattern]))
def test_flatten():
x = cuda.fmatrix('x')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论