提交 330f40aa authored 作者: Frederic's avatar Frederic

merged the test for gpumax and gpusum.

上级 e0516eef
...@@ -42,7 +42,15 @@ def tes_use(): ...@@ -42,7 +42,15 @@ def tes_use():
tcn.use() tcn.use()
def test_sum(): def tensor_pattern_to_gpu_pattern(shape, pattern):
gpu_pattern = [0 for elem in shape]
for idx in pattern:
gpu_pattern[idx] = 1
gpu_pattern = tuple(gpu_pattern)
return gpu_pattern
def test_careduce():
""" """
test sum pattern 1, 11, 10, 01, 001, 010, 100, 110, 011, 111, test sum pattern 1, 11, 10, 01, 001, 010, 100, 110, 011, 111,
0011, 0101, 0111, 1011, 1111 0011, 0101, 0111, 1011, 1111
...@@ -56,363 +64,198 @@ def test_sum(): ...@@ -56,363 +64,198 @@ def test_sum():
TODO: test with broadcast TODO: test with broadcast
""" """
for shape, pattern in [((1,1),(1,)), for scalar_op in [theano.scalar.add, theano.scalar.maximum]:
((1,0),(1,)), for shape, pattern in [((1,1),(1,)),
((0,1),(1,)), ((1,0),(1,)),
((0,0),(1,)), ((0,1),(1,)),
((0,0,0),(1,2)), ((0,0),(1,)),
((0,0,0,0),(1,2,3)), ((0,0,0),(1,2)),
((2,1),(1,)), ((0,0,0,0),(1,2,3)),
((1,2),(1,)), ((2,1),(1,)),
((100,3,1300),[1]), ((1,2),(1,)),
((0,),[0]),((5,),[0]), ((100,3,1300),[1]),
((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test. ((0,),[0]),((5,),[0]),
((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]), ((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
((0,0,0,0),[0,1,2,3]), ((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]), ((0,0,0,0),[0,1,2,3]),
((5,4,3,10,11),[1,2]), ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]), ((5,4,3,10,11),[1,2]),
((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
#test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
((4100,3),[0]),((3,4101),[0]),#10 #test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
((1024,33),[0]),((33,1024),[0]),#10 ((4100,3),[0]),((3,4101),[0]),#10
((1025,33),[0]),((33,1025),[0]),#10 ((1024,33),[0]),((33,1024),[0]),#10
((1025,33),[0]),((33,1025),[0]),#10
((4100,3),[1]),((3,4101),[1]),#01
((1024,33),[1]),((33,1024),[1]),#01 ((4100,3),[1]),((3,4101),[1]),#01
((1025,33),[1]),((33,1025),[1]),#01 ((1024,33),[1]),((33,1024),[1]),#01
((1025,33),[1]),((33,1025),[1]),#01
((4100,3),[0,1]),((3,4101),[0,1]),#11
((1024,33),[0,1]),((33,1024),[0,1]),#01 ((4100,3),[0,1]),((3,4101),[0,1]),#11
((1025,33),[0,1]),((33,1025),[0,1]),#01 ((1024,33),[0,1]),((33,1024),[0,1]),#01
((1025,33),[0,1]),((33,1025),[0,1]),#01
((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]),#100
((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010 ((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]),#100
((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001 ((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010
((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110 ((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001
((4100,4,3),[1,2]),((5,4100,3),[1,2]),((5,4,4100),[1,2]),#011 ((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110
#((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented ((4100,4,3),[1,2]),((5,4100,3),[1,2]),((5,4,4100),[1,2]),#011
((4100,4,3),[0,1,2]),((5,4100,3),[0,1,2]),((5,4,4100),[0,1,2]),#111 #((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
((4100,4,3),[0,1,2]),((5,4100,3),[0,1,2]),((5,4,4100),[0,1,2]),#111
((4100,4,3,2),[2,3]),((4,4100,3,2),[2,3]),((4,3,4100,2),[2,3]),((4,3,2,4100),[2,3]),#0011
((4100,4,3,2),[1,3]),((4,4100,3,2),[1,3]),((4,3,4100,2),[1,3]),((4,3,2,4100),[1,3]),#0101 ((4100,4,3,2),[2,3]),((4,4100,3,2),[2,3]),((4,3,4100,2),[2,3]),((4,3,2,4100),[2,3]),#0011
((4100,4,3,2),[0,2,3]),((4,4100,3,2),[0,2,3]),((4,3,4100,2),[0,2,3]),#((4,3,2,4100),[0,2,3]),#1011 ((4100,4,3,2),[1,3]),((4,4100,3,2),[1,3]),((4,3,4100,2),[1,3]),((4,3,2,4100),[1,3]),#0101
((4100,4,3,2),[1,2,3]),((4,4100,3,2),[1,2,3]),((4,3,4100,2),[1,2,3]),((4,3,2,4100),[1,2,3]),#0111 ((4100,4,3,2),[0,2,3]),((4,4100,3,2),[0,2,3]),((4,3,4100,2),[0,2,3]),#((4,3,2,4100),[0,2,3]),#1011
((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),#1111 ((4100,4,3,2),[1,2,3]),((4,4100,3,2),[1,2,3]),((4,3,4100,2),[1,2,3]),((4,3,2,4100),[1,2,3]),#0111
((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),#1111
#test pattern implemented by reshape
((4100,4,3,2),[0]),((4,4100,3,2),[0]),((4,3,4100,2),[0]),((4,3,2,4100),[0]),#1000 #test pattern implemented by reshape
((4100,4,3,2),[1]),((4,4100,3,2),[1]),((4,3,4100,2),[1]),((4,3,2,4100),[1]),#0100 ((4100,4,3,2),[0]),((4,4100,3,2),[0]),((4,3,4100,2),[0]),((4,3,2,4100),[0]),#1000
((4100,4,3,2),[2]),((4,4100,3,2),[2]),((4,3,4100,2),[2]),((4,3,2,4100),[2]),#0010 ((4100,4,3,2),[1]),((4,4100,3,2),[1]),((4,3,4100,2),[1]),((4,3,2,4100),[1]),#0100
((4100,4,3,2),[3]),((4,4100,3,2),[3]),((4,3,4100,2),[3]),((4,3,2,4100),[3]),#0001 ((4100,4,3,2),[2]),((4,4100,3,2),[2]),((4,3,4100,2),[2]),((4,3,2,4100),[2]),#0010
((1100,2,3,4,5),[0,1,2,3,4]),((2,1100,3,4,5),[0,1,2,3,4]),((2,3,1100,4,5),[0,1,2,3,4]),((2,3,4,1100,5),[0,1,2,3,4]),((2,3,4,5,1100),[0,1,2,3,4]),#11111 ((4100,4,3,2),[3]),((4,4100,3,2),[3]),((4,3,4100,2),[3]),((4,3,2,4100),[3]),#0001
((1100,2,3,4,5),[0,1,2,3,4]),((2,1100,3,4,5),[0,1,2,3,4]),((2,3,1100,4,5),[0,1,2,3,4]),((2,3,4,1100,5),[0,1,2,3,4]),((2,3,4,5,1100),[0,1,2,3,4]),#11111
]:
a = tensor.TensorType('float32', (False,) * len(shape))() ]:
b = T.Sum(pattern)(a)
val = numpy.random.rand(numpy.prod(shape)).reshape(shape) op = tensor.CAReduce(scalar_op, axis=pattern)
# val = numpy.ones(shape) pat = tensor_pattern_to_gpu_pattern(shape, pattern)
# val = numpy.arange(numpy.prod(shape)).reshape(shape) #GpuCAReduce{maximum} support only those patterns
val = theano._asarray(val, dtype='float32') if scalar_op is theano.scalar.maximum and pat not in [
f = theano.function([a], b, mode=mode_with_gpu) (0, 1), (0, 1, 1), (0, 1, 1)]:
f2 = theano.function([a], b, mode=mode_without_gpu) continue
assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
if val.size == 0:
assert _allclose(f2(val), f(val)), ('shape', shape, 'pattern', pattern)
else:
try:
#We raise the error threashold as we sum big matrix
#and this cause small rounding difference with some seed
#example in debug mode with unittests.rseed=9275
orig_rtol = theano.tensor.basic.float32_rtol
theano.tensor.basic.float32_rtol = 2e-5
assert _allclose(f2(val), f(val)), ('shape', shape,
'pattern', pattern,
sum([shape[i] for i in pattern]),
f2(val), f(val), val)
finally:
theano.tensor.basic.float32_rtol = orig_rtol
#test with dimshuffle
#we shuffle the 2 outer dims.
for shape, pattern in [#((5,),[0]),
((5,4),[0,1]),((5,4),[0]),
((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
a = tensor.TensorType('float32', (False,) * len(shape))()
dim_pattern = range(len(shape))
dim_pattern[0] = 1
dim_pattern[1] = 0
a = a.dimshuffle(dim_pattern)
b = T.Sum(pattern)(a)
val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val = theano._asarray(val, dtype='float32')
f = theano.function([a], b, mode=mode_with_gpu)
f2 = theano.function([a], b, mode=mode_without_gpu)
assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert T.Sum in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
assert _allclose(f2(val), f(val)), ('shape', shape,
'pattern', pattern,
sum([shape[i] for i in pattern]))
#test with broadcast
for shape, pattern in [((5,),[0]),
((5,4),[0,1]),((5,4),[0]),
((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
shape = numpy.asarray(shape) * 2
a = tensor.TensorType('float32', (False,) * len(shape))()
a2 = tcn.CudaNdarrayType((False,) * len(shape))()
b = T.Sum(pattern)(a)
b2 = T.Sum(pattern)(a2)
val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val = theano._asarray(val, dtype='float32')
val2 = cuda.CudaNdarray(val)
if len(shape) == 1:
val = val[::2]
val2 = val2[::2]
elif len(shape) == 2:
val = val[::2, ::2]
val2 = val2[::2, ::2]
elif len(shape) == 3:
val = val[::2, ::2, ::2]
val2 = val2[::2, ::2, ::2]
elif len(shape) == 4:
val = val[::2, ::2, ::2, ::2]
val2 = val2[::2, ::2, ::2, ::2]
f = theano.function([a], b, mode=mode_without_gpu)
f2 = theano.function([a2], b2, mode=mode_with_gpu)
assert tcn.GpuCAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
assert T.Sum in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert _allclose(f2(val2), f(val)), ('shape', shape,
'pattern', pattern,
sum([shape[i] for i in pattern]))
def test_max():
"""
test GpuMax pattern 01, 011, 0111 (tensor.max pattern (1,), (1,2), (1,2,3) )
TODO: are others currently implemented by reshape? a = tensor.TensorType('float32', (False,) * len(shape))()
""" b = op(a)
val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val = theano._asarray(val, dtype='float32')
f = theano.function([a], b, mode=mode_with_gpu)
f2 = theano.function([a], b, mode=mode_without_gpu)
assert tcn.GpuCAReduce in [x.op.__class__
for x in f.maker.fgraph.toposort()]
assert op.__class__ in [x.op.__class__
for x in f2.maker.fgraph.toposort()]
f_caused_value_error = False
try:
f_out = f(val)
except ValueError, e:
exc = e
f_caused_value_error = True
def tensor_pattern_to_gpu_pattern(shape, pattern):
gpu_pattern = [ 0 for elem in shape ]
for idx in pattern:
gpu_pattern[idx] = 1
gpu_pattern = tuple(gpu_pattern)
return gpu_pattern
known_fail = False
for shape, pattern in [((1,1),(1,)),
((1,0),(1,)),
((0,1),(1,)),
((0,0),(1,)),
((0,0,0),(1,2)),
((0,0,0,0),(1,2,3)),
((2,1),(1,)),
((1,2),(1,)),
((100,3,1300),[1]),
((0,),[0]),((5,),[0]),
((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
((0,0,0,0),[0,1,2,3]),
((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
((5,4,3,10,11),[1,2]),
((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
#test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
((4100,3),[0]),((3,4101),[0]),#10
((1024,33),[0]),((33,1024),[0]),#10
((1025,33),[0]),((33,1025),[0]),#10
((4100,3),[1]),((3,4101),[1]),#01
((1024,33),[1]),((33,1024),[1]),#01
((1025,33),[1]),((33,1025),[1]),#01
((4100,3),[0,1]),((3,4101),[0,1]),#11
((1024,33),[0,1]),((33,1024),[0,1]),#01
((1025,33),[0,1]),((33,1025),[0,1]),#01
((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]),#100
((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010
((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001
((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110
((4100,4,3),[1,2]),((5,4100,3),[1,2]),((5,4,4100),[1,2]),#011
#((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
((4100,4,3),[0,1,2]),((5,4100,3),[0,1,2]),((5,4,4100),[0,1,2]),#111
((4100,4,3,2),[2,3]),((4,4100,3,2),[2,3]),((4,3,4100,2),[2,3]),((4,3,2,4100),[2,3]),#0011
((4100,4,3,2),[1,3]),((4,4100,3,2),[1,3]),((4,3,4100,2),[1,3]),((4,3,2,4100),[1,3]),#0101
((4100,4,3,2),[0,2,3]),((4,4100,3,2),[0,2,3]),((4,3,4100,2),[0,2,3]),#((4,3,2,4100),[0,2,3]),#1011
((4100,4,3,2),[1,2,3]),((4,4100,3,2),[1,2,3]),((4,3,4100,2),[1,2,3]),((4,3,2,4100),[1,2,3]),#0111
((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),#1111
#test pattern implemented by reshape
((4100,4,3,2),[0]),((4,4100,3,2),[0]),((4,3,4100,2),[0]),((4,3,2,4100),[0]),#1000
((4100,4,3,2),[1]),((4,4100,3,2),[1]),((4,3,4100,2),[1]),((4,3,2,4100),[1]),#0100
((4100,4,3,2),[2]),((4,4100,3,2),[2]),((4,3,4100,2),[2]),((4,3,2,4100),[2]),#0010
((4100,4,3,2),[3]),((4,4100,3,2),[3]),((4,3,4100,2),[3]),((4,3,2,4100),[3]),#0001
((1100,2,3,4,5),[0,1,2,3,4]),((2,1100,3,4,5),[0,1,2,3,4]),((2,3,1100,4,5),[0,1,2,3,4]),((2,3,4,1100,5),[0,1,2,3,4]),((2,3,4,5,1100),[0,1,2,3,4]),#11111
]:
# Don't test patterns that aren't implemented for max yet
if tensor_pattern_to_gpu_pattern(shape, pattern) not in \
[ (0,1), (0,1,1), (0,1,1) ]:
continue
a = tensor.TensorType('float32', (False,) * len(shape))()
b = T.max(a, pattern)
val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
# val = numpy.ones(shape)
# val = numpy.arange(numpy.prod(shape)).reshape(shape)
val = theano._asarray(val, dtype='float32')
f = theano.function([a], b, mode=mode_with_gpu)
f_caused_value_error = False
try:
f_out = f(val)
except ValueError, e:
exc = e
f_caused_value_error = True
f2 = theano.function([a], b, mode=mode_without_gpu)
try:
f2_out = f2(val)
f2_caused_value_error = False f2_caused_value_error = False
except ValueError, e: try:
exc2 = e f2_out = f2(val)
f2_caused_value_error = True except ValueError, e:
exc2 = e
f2_caused_value_error = True
assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
assert T.CAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()] if f_caused_value_error != f2_caused_value_error:
if f_caused_value_error:
print 'f caused this value error:'
print exc
else:
print 'f did not raise a value error, but should have'
if f2_caused_value_error:
print 'f2 caused this value error:'
print exc2
else:
print 'f should not have raised a value error'
print 'shape was: ', shape
print 'pattern was: ', pattern
assert False
# Check that 0 shape matrices are invalid in the same cases
if f_caused_value_error != f2_caused_value_error:
if f_caused_value_error:
print 'f caused this value error:'
print exc
else:
print 'f did not raise a value error, but should have'
if f2_caused_value_error:
print 'f2 caused this value error:'
print exc2
else:
print 'f should not have raised a value error'
print 'shape was: ',shape
print 'pattern was: ',pattern
assert False
if f_caused_value_error:
continue
if val.size == 0:
assert f2(val).size == f(val).size
assert f2(val).shape == f(val).shape
else:
try: try:
#We raise the error threashold as we sum big matrix #We raise the error threashold as we sum big matrix
#and this cause small rounding difference with some seed #and this cause small rounding difference with some seed
#example in debug mode with unittests.rseed=9275 #example in debug mode with unittests.rseed=9275
orig_rtol = theano.tensor.basic.float32_rtol orig_rtol = theano.tensor.basic.float32_rtol
theano.tensor.basic.float32_rtol = 2e-5 theano.tensor.basic.float32_rtol = 2e-5
f2_val = f2(val) assert _allclose(f_out, f2_out), ('shape', shape,
f_val = f(val) 'pattern', pattern,
if not _allclose(f2_val, f_val): sum([shape[i] for i in pattern]),
print 'failed for the following arguments: ' f2(val), f(val), val)
print 'shape:',shape
print 'pattern: ',pattern
print 'input:'
print val
print 'correct output: '
print f2_val
print 'actual output: '
print f_val
assert False
finally: finally:
theano.tensor.basic.float32_rtol = orig_rtol theano.tensor.basic.float32_rtol = orig_rtol
#test with dimshuffle #test with dimshuffle
#we shuffle the 2 outer dims. #we shuffle the 2 outer dims.
for shape, pattern in [#((5,),[0]), for shape, pattern in [#((5,),[0]),
((5,4),(0,1)),((5,4),[0]), ((5,4),[0,1]),((5,4),[0]),
((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]), ((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]: ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
# Don't test patterns that aren't implemented for max yet op = tensor.CAReduce(scalar_op, axis=pattern)
if tensor_pattern_to_gpu_pattern(shape, pattern) not in \ pat = tensor_pattern_to_gpu_pattern(shape, pattern)
[ (0,1), (0,1,1), (0,1,1) ]: #GpuCAReduce{maximum} support only those patterns
continue if scalar_op is theano.scalar.maximum and pat not in [
a = tensor.TensorType('float32', (False,) * len(shape))() (0, 1), (0, 1, 1), (0, 1, 1)]:
dim_pattern = range(len(shape)) continue
dim_pattern[0] = 1 a = tensor.TensorType('float32', (False,) * len(shape))()
dim_pattern[1] = 0 dim_pattern = range(len(shape))
a = a.dimshuffle(dim_pattern) dim_pattern[0] = 1
b = T.max(a, pattern) dim_pattern[1] = 0
val = numpy.random.rand(numpy.prod(shape)).reshape(shape) a = a.dimshuffle(dim_pattern)
# val = numpy.ones(shape) b = op(a)
# val = numpy.arange(numpy.prod(shape)).reshape(shape) val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
val = theano._asarray(val, dtype='float32') # val = numpy.ones(shape)
f = theano.function([a], b, mode=mode_with_gpu) # val = numpy.arange(numpy.prod(shape)).reshape(shape)
f2 = theano.function([a], b, mode=mode_without_gpu) val = theano._asarray(val, dtype='float32')
assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()] f = theano.function([a], b, mode=mode_with_gpu)
assert T.CAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()] f2 = theano.function([a], b, mode=mode_without_gpu)
assert _allclose(f2(val), f(val)), ('shape', shape, assert tcn.GpuCAReduce in [x.op.__class__
'pattern', pattern, for x in f.maker.fgraph.toposort()]
sum([shape[i] for i in pattern])) assert op.__class__ in [x.op.__class__
for x in f2.maker.fgraph.toposort()]
#test with broadcast assert _allclose(f2(val), f(val)), ('shape', shape,
for shape, pattern in [((5,),(0,)), 'pattern', pattern,
((5,4),(0,1)), sum([shape[i] for i in pattern]))
((5,4),(0,)),
((5,4,3),(0,)), #test with broadcast
((5,4,3),(0,1)), for shape, pattern in [((5,),[0]),
((5,4,3),(2,)), ((5,4),[0,1]),((5,4),[0]),
((5,4,3),(0,1,2)), ((5,4,3),[0]),((5,4,3),[0,1]),
((5,4,3,2),(0,1,2,3)), ((5,4,3),[2]),((5,4,3),[0,1,2]),
((5,4,3,2),(0,2,3))]: ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
# Don't test patterns that aren't implemented for max yet op = tensor.CAReduce(scalar_op, axis=pattern)
if tensor_pattern_to_gpu_pattern(shape, pattern) not in \ pat = tensor_pattern_to_gpu_pattern(shape, pattern)
[ (0,1), (0,1,1), (0,1,1) ]: #GpuCAReduce{maximum} support only those patterns
continue if scalar_op is theano.scalar.maximum and pat not in [
shape = numpy.asarray(shape) * 2 (0, 1), (0, 1, 1), (0, 1, 1)]:
a = tensor.TensorType('float32', (False,) * len(shape))() continue
a2 = tcn.CudaNdarrayType((False,) * len(shape))() shape = numpy.asarray(shape) * 2
b = T.max(a, pattern) a = tensor.TensorType('float32', (False,) * len(shape))()
b2 = T.max(a2, pattern) a2 = tcn.CudaNdarrayType((False,) * len(shape))()
val = numpy.random.rand(numpy.prod(shape)).reshape(shape) b = op(a)
# val = numpy.ones(shape) b2 = op(a2)
# val = numpy.arange(numpy.prod(shape)).reshape(shape) val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
val = theano._asarray(val, dtype='float32') # val = numpy.ones(shape)
val2 = cuda.CudaNdarray(val) # val = numpy.arange(numpy.prod(shape)).reshape(shape)
if len(shape) == 1: val = theano._asarray(val, dtype='float32')
val = val[::2] val2 = cuda.CudaNdarray(val)
val2 = val2[::2] if len(shape) == 1:
elif len(shape) == 2: val = val[::2]
val = val[::2, ::2] val2 = val2[::2]
val2 = val2[::2, ::2] elif len(shape) == 2:
elif len(shape) == 3: val = val[::2, ::2]
val = val[::2, ::2, ::2] val2 = val2[::2, ::2]
val2 = val2[::2, ::2, ::2] elif len(shape) == 3:
elif len(shape) == 4: val = val[::2, ::2, ::2]
val = val[::2, ::2, ::2, ::2] val2 = val2[::2, ::2, ::2]
val2 = val2[::2, ::2, ::2, ::2] elif len(shape) == 4:
f = theano.function([a], b, mode=mode_without_gpu) val = val[::2, ::2, ::2, ::2]
f2 = theano.function([a2], b2, mode=mode_with_gpu) val2 = val2[::2, ::2, ::2, ::2]
assert tcn.GpuCAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()] f = theano.function([a], b, mode=mode_without_gpu)
assert T.CAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()] f2 = theano.function([a2], b2, mode=mode_with_gpu)
assert _allclose(f2(val2), f(val)), ('shape', shape, assert tcn.GpuCAReduce in [x.op.__class__
'pattern', pattern, for x in f2.maker.fgraph.toposort()]
sum([shape[i] for i in pattern])) assert op.__class__ in [x.op.__class__
for x in f.maker.fgraph.toposort()]
assert _allclose(f2(val2), f(val)), ('shape', shape,
'pattern', pattern,
sum([shape[i] for i in pattern]))
def test_flatten(): def test_flatten():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论