added test of GpuCAReduce max

0478beb3 · Ian Goodfellow · b9d126e6 · 0478beb3
--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -187,6 +187,169 @@ def test_sum():
                                             'pattern', pattern,
                                             sum([shape[i] for i in pattern]))

+def test_max():
+    """
+    test GpuMax pattern 01, 011, 0111 (tensor.max pattern (1,), (1,2), (1,2,3) )
+
+    TODO: are others currently implemented by reshape?
+    """
+
+    def tensor_pattern_to_gpu_pattern(shape, pattern):
+        gpu_pattern = [ 0 for elem in shape ]
+        for idx in pattern:
+            gpu_pattern[idx] = 1
+        gpu_pattern = tuple(gpu_pattern)
+        return gpu_pattern
+
+    for shape, pattern in [((100,3,1300),[1]),
+                           ((0,),[0]),((5,),[0]),
+                           ((0,0),[0,1]),((1,0),[0,1]),((5,4),[0,1]),((33,31),[0,1]),((5,4),[1]),((5,4),[0]),#need something bigger then 32 for some opt test.
+                           ((5,4,3),[0]),((5,4,3),[1]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[1,2]),((5,4,3),[0,1,2]),
+                           ((0,0,0,0),[0,1,2,3]),
+                           ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
+                           ((5,4,3,10,11),[1,2]),
+                           ((5,4,3,20),[2,3]), ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3]),((5,4,3,2),[1,2,3]),
+
+                           #test shape bigger then 4096 on each dimension to make sure that we work correctly when we don't have enough thread/block in each dimensions
+                           ((4100,3),[0]),((3,4101),[0]),#10
+                           ((1024,33),[0]),((33,1024),[0]),#10
+                           ((1025,33),[0]),((33,1025),[0]),#10
+
+                           ((4100,3),[1]),((3,4101),[1]),#01
+                           ((1024,33),[1]),((33,1024),[1]),#01
+                           ((1025,33),[1]),((33,1025),[1]),#01
+
+                           ((4100,3),[0,1]),((3,4101),[0,1]),#11
+                           ((1024,33),[0,1]),((33,1024),[0,1]),#01
+                           ((1025,33),[0,1]),((33,1025),[0,1]),#01
+
+                           ((4100,4,3),[0]),((5,4100,3),[0]),((5,4,4100),[0]),#100
+                           ((4100,4,3),[1]),((5,4100,3),[1]),((5,4,4100),[1]),#010
+                           ((4100,4,3),[2]),((5,4100,3),[2]),((5,4,4100),[2]),#001
+                           ((4100,4,3),[0,1]),((5,4100,3),[0,1]),((5,4,4100),[0,1]),#110
+                           ((4100,4,3),[1,2]),((5,4100,3),[1,2]),((5,4,4100),[1,2]),#011
+                           #((4100,4,3),[0,2]),((5,4100,3),[0,2]),((5,4,4100),[0,2]),#101 ##not implemented
+                           ((4100,4,3),[0,1,2]),((5,4100,3),[0,1,2]),((5,4,4100),[0,1,2]),#111
+
+                           ((4100,4,3,2),[2,3]),((4,4100,3,2),[2,3]),((4,3,4100,2),[2,3]),((4,3,2,4100),[2,3]),#0011
+                           ((4100,4,3,2),[1,3]),((4,4100,3,2),[1,3]),((4,3,4100,2),[1,3]),((4,3,2,4100),[1,3]),#0101
+                           ((4100,4,3,2),[0,2,3]),((4,4100,3,2),[0,2,3]),((4,3,4100,2),[0,2,3]),#((4,3,2,4100),[0,2,3]),#1011
+                           ((4100,4,3,2),[1,2,3]),((4,4100,3,2),[1,2,3]),((4,3,4100,2),[1,2,3]),((4,3,2,4100),[1,2,3]),#0111
+                           ((4100,2,3,4),[0,1,2,3]),((2,4100,3,4),[0,1,2,3]),((2,3,4100,4),[0,1,2,3]),((2,3,4,4100),[0,1,2,3]),#1111
+
+
+                           #test pattern implemented by reshape
+                           ((4100,4,3,2),[0]),((4,4100,3,2),[0]),((4,3,4100,2),[0]),((4,3,2,4100),[0]),#1000
+                           ((4100,4,3,2),[1]),((4,4100,3,2),[1]),((4,3,4100,2),[1]),((4,3,2,4100),[1]),#0100
+                           ((4100,4,3,2),[2]),((4,4100,3,2),[2]),((4,3,4100,2),[2]),((4,3,2,4100),[2]),#0010
+                           ((4100,4,3,2),[3]),((4,4100,3,2),[3]),((4,3,4100,2),[3]),((4,3,2,4100),[3]),#0001
+                           ((1100,2,3,4,5),[0,1,2,3,4]),((2,1100,3,4,5),[0,1,2,3,4]),((2,3,1100,4,5),[0,1,2,3,4]),((2,3,4,1100,5),[0,1,2,3,4]),((2,3,4,5,1100),[0,1,2,3,4]),#11111
+
+                           ]:
+        # Don't test patterns that aren't implemented for max yet
+        if tensor_pattern_to_gpu_pattern(shape, pattern) not in \
+            [ (0,1), (0,1,1), (0,1,1) ]:
+            continue
+
+        a = tensor.TensorType('float32', (False,) * len(shape))()
+        b = T.max(a, pattern)
+        val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
+#        val = numpy.ones(shape)
+#        val = numpy.arange(numpy.prod(shape)).reshape(shape)
+        val = theano._asarray(val, dtype='float32')
+        f = theano.function([a], b, mode=mode_with_gpu)
+        f2 = theano.function([a], b, mode=mode_without_gpu)
+        assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
+        assert T.CAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
+        if val.size == 0:
+            assert f2(val) == f(val), ('shape', shape, 'pattern', pattern)
+        else:
+            try:
+                #We raise the error threashold as we sum big matrix
+                #and this cause small rounding difference with some seed
+                #example in debug mode with unittests.rseed=9275
+                orig_rtol = theano.tensor.basic.float32_rtol
+                theano.tensor.basic.float32_rtol = 2e-5
+                assert _allclose(f2(val), f(val)), ('shape', shape,
+                                                    'pattern', pattern,
+                                                    sum([shape[i] for i in pattern]),
+                                                    f2(val), f(val), val)
+            finally:
+                theano.tensor.basic.float32_rtol = orig_rtol
+
+
+        #test with dimshuffle
+        #we shuffle the 2 outer dims.
+    for shape, pattern in [#((5,),[0]),
+                           ((5,4),(0,1)),((5,4),[0]),
+                           ((5,4,3),[0]),((5,4,3),[0,1]),((5,4,3),[2]),((5,4,3),[0,1,2]),
+                           ((5,4,3,2),[0,1,2,3]), ((5,4,3,2),[0,2,3])]:
+        # Don't test patterns that aren't implemented for max yet
+        if tensor_pattern_to_gpu_pattern(shape, pattern) not in \
+            [ (0,1), (0,1,1), (0,1,1) ]:
+            continue
+        a = tensor.TensorType('float32', (False,) * len(shape))()
+        dim_pattern = range(len(shape))
+        dim_pattern[0] = 1
+        dim_pattern[1] = 0
+        a = a.dimshuffle(dim_pattern)
+        b = T.max(a, pattern)
+        val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
+#        val = numpy.ones(shape)
+#        val = numpy.arange(numpy.prod(shape)).reshape(shape)
+        val = theano._asarray(val, dtype='float32')
+        f = theano.function([a], b, mode=mode_with_gpu)
+        f2 = theano.function([a], b, mode=mode_without_gpu)
+        assert tcn.GpuCAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
+        assert T.CAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
+        assert _allclose(f2(val), f(val)), ('shape', shape,
+                                            'pattern', pattern,
+                                            sum([shape[i] for i in pattern]))
+
+
+        #test with broadcast
+    for shape, pattern in [((5,),(0,)),
+                           ((5,4),(0,1)),
+                           ((5,4),(0,)),
+                           ((5,4,3),(0,)),
+                           ((5,4,3),(0,1)),
+                           ((5,4,3),(2,)),
+                           ((5,4,3),(0,1,2)),
+                           ((5,4,3,2),(0,1,2,3)),
+                           ((5,4,3,2),(0,2,3))]:
+        # Don't test patterns that aren't implemented for max yet
+        if tensor_pattern_to_gpu_pattern(shape, pattern) not in \
+            [ (0,1), (0,1,1), (0,1,1) ]:
+            continue
+        shape = numpy.asarray(shape) * 2
+        a = tensor.TensorType('float32', (False,) * len(shape))()
+        a2 = tcn.CudaNdarrayType((False,) * len(shape))()
+        b = T.max(a, pattern)
+        b2 = T.max(a2, pattern)
+        val = numpy.random.rand(numpy.prod(shape)).reshape(shape)
+#        val = numpy.ones(shape)
+#        val = numpy.arange(numpy.prod(shape)).reshape(shape)
+        val = theano._asarray(val, dtype='float32')
+        val2 = cuda.CudaNdarray(val)
+        if len(shape) == 1:
+            val = val[::2]
+            val2 = val2[::2]
+        elif len(shape) == 2:
+            val = val[::2, ::2]
+            val2 = val2[::2, ::2]
+        elif len(shape) == 3:
+            val = val[::2, ::2, ::2]
+            val2 = val2[::2, ::2, ::2]
+        elif len(shape) == 4:
+            val = val[::2, ::2, ::2, ::2]
+            val2 = val2[::2, ::2, ::2, ::2]
+        f = theano.function([a], b, mode=mode_without_gpu)
+        f2 = theano.function([a2], b2, mode=mode_with_gpu)
+        assert tcn.GpuCAReduce in [x.op.__class__ for x in f2.maker.fgraph.toposort()]
+        assert T.CAReduce in [x.op.__class__ for x in f.maker.fgraph.toposort()]
+        assert _allclose(f2(val2), f(val)), ('shape', shape,
+                                             'pattern', pattern,
+                                             sum([shape[i] for i in pattern]))

 def test_flatten():
    x = cuda.fmatrix('x')