Merge pull request #2783 from nouiz/pool_average

Average pool CPU with python code

Merge pull request #2783 from nouiz/pool_average
9dc07802 · abergeron · 54363a8d · 8df6d348 · 9dc07802 · 9dc07802
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -721,7 +721,8 @@ class GpuDnnPoolDesc(GpuOp):
    :param ws: windows size
    :param stride: (dx, dy)
-    :param mode: 'max' or 'average'
+    :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'
+        The old deprecated name 'average' correspond to 'average_inc_pad'
    :param pad: (padX, padY) padding information.
        padX is the size of the left and right borders,
        padY is the size of the top and bottom borders.
@@ -744,7 +745,9 @@ class GpuDnnPoolDesc(GpuOp):
        return False
    def __init__(self, ws=(1, 1), stride=(1, 1), mode='max', pad=(0, 0)):
-        assert mode in ('max', 'average')
+        if mode == 'average':
+            mode = 'average_inc_pad'
+        assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
        self.mode = mode
        assert len(ws) == 2
        self.ws = ws
@@ -772,8 +775,12 @@ class GpuDnnPoolDesc(GpuOp):
        if self.mode == 'max':
            mode_flag = 'CUDNN_POOLING_MAX'
-        elif self.mode == "average":
+        elif self.mode == "average_inc_pad":
            mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
+        elif self.mode == "average_exc_pad":
+            mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
+            if version() == -1:
+                raise Exception("cudnn v1 do not support average_exc_pad")
        else:
            raise NotImplementedError("Unsupported pooling model.")
@@ -1194,7 +1201,8 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
    :param img: images to do the pooling over
    :param ws: subsampling window size
    :param stride: subsampling stride (default: (1, 1))
-    :param mode: one of 'max', 'average' (default: 'max')
+    :param mode: one of 'max', 'average_inc_pad' or 'average_exc_pad
+        (default: 'max')
    :param pad: (padX, padY) padding information.
        padX is the size of the left and right borders,
        padY is the size of the top and bottom borders.
@@ -1625,7 +1633,7 @@ if True:
    @register_opt('cudnn')
    @local_optimizer([DownsampleFactorMax])
-    def local_pool_dnn_stride(node):
+    def local_pool_dnn_alternative(node):
        if not dnn_available():
            return
        if isinstance(node.op, DownsampleFactorMax):
@@ -1635,9 +1643,10 @@ if True:
            ds = node.op.ds
            stride = node.op.st
            pad = node.op.padding
+            mode = node.op.mode
            if (img.owner and isinstance(img.owner.op, HostFromGpu)):
                ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
-                               ds, stride=stride, pad=pad)
+                               ds, stride=stride, pad=pad, mode=mode)
                return [host_from_gpu(ret)]
    @register_opt('cudnn')
@@ -1667,12 +1676,13 @@ if True:
            ds = node.op.ds
            st = node.op.st
            pad = node.op.padding
+            mode = node.op.mode
            if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
                (out.owner and isinstance(out.owner.op, HostFromGpu)) or
                (inp_grad.owner and isinstance(inp_grad.owner.op,
                                               HostFromGpu))):
-                desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max", pad=pad)()
+                desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)()
                if not node.op.ignore_border:
                    return
                ret = GpuDnnPoolGrad()(gpu_contiguous(inp),

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1648,8 +1648,9 @@ import theano.tensor.signal.downsample as downsample
 def local_gpu_downsample_factor_max(node):
    if (isinstance(node.op, downsample.DownsampleFactorMax)
        and node.op.ds == node.op.st):
-        assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding')
+        assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding',
-        if node.op.padding != (0, 0):
+                                     'mode')
+        if node.op.padding != (0, 0) or node.op.mode != 'max':
            return
        x, = node.inputs
        if (x.owner and isinstance(x.owner.op, HostFromGpu)):
@@ -1662,8 +1663,9 @@ def local_gpu_downsample_factor_max(node):
 def local_gpu_downsample_factor_max_grad(node):
    if (isinstance(node.op, downsample.DownsampleFactorMaxGrad) and
        node.op.ds == node.op.st):
-        assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding')
+        assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding',
-        if node.op.padding != (0, 0):
+                                     'mode')
+        if node.op.padding != (0, 0) or node.op.mode != 'max':
            return
        x, z, gz = node.inputs
        if (x.owner and isinstance(x.owner.op, HostFromGpu)):
@@ -1678,6 +1680,8 @@ def local_gpu_downsample_factor_max_grad(node):
 @local_optimizer([downsample.DownsampleFactorMaxGradGrad])
 def local_gpu_downsample_factor_max_grad_grad(node):
    if isinstance(node.op, downsample.DownsampleFactorMaxGradGrad):
+        assert node.op.__props__ == ('ds', 'ignore_border', 'st')
        x, z, gx = node.inputs
        if (x.owner and isinstance(x.owner.op, HostFromGpu)):
            op = GpuDownsampleFactorMaxGradGrad(node.op.ds,

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -183,8 +183,12 @@ def test_pooling():
        raise SkipTest(cuda.dnn.dnn_available.msg)
    x = T.ftensor4()
-    for func, pad in product((T.max, T.mean),
+    for mode, pad in product(('max', 'average_inc_pad', 'average_exc_pad'),
                             ((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))):
+        if mode == 'max':
+            func = T.max
+        else:
+            func = T.mean
        if pad != (0, 0) and cuda.dnn.version() == -1:
            continue
@@ -195,29 +199,23 @@ def test_pooling():
            for stride in (2, 3):
                if stride > ws:
                    continue
-                if func is T.max:
+                if pad[0] > stride or pad[1] > stride:
-                    if pad[0] > stride or pad[1] > stride:
+                    # Not implemented
-                        # Not implemented
+                    continue
-                        continue
+                # We will check that the opt introduced it.
-                    # We will check that the opt introduced it.
+                out1 = max_pool_2d(x, (ws, ws),
-                    out1 = max_pool_2d(x, (ws, ws),
+                                   st=(stride, stride),
-                                       st=(stride, stride),
+                                   ignore_border=True,
-                                       ignore_border=True,
+                                   padding=pad, mode=mode)
-                                       padding=pad)
-                else:
-                    out1 = cuda.dnn.dnn_pool(
-                        x, ws=(ws, ws),
-                        stride=(stride, stride),
-                        pad=pad,
-                        mode='max' if func is T.max else "average")
                out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride),
                                   pad=pad,
                                   pool_function=func)
+                mode_without_gpu2 = mode_without_gpu.including()
+                mode_without_gpu2.check_isfinite = False
                f1 = theano.function([x], out1, mode=mode_with_gpu)
                assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
                            for node in f1.maker.fgraph.apply_nodes])
-                f2 = theano.function([x], out2, mode=mode_without_gpu)
+                f2 = theano.function([x], out2, mode=mode_without_gpu2)
                assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool)
                                for node in f2.maker.fgraph.apply_nodes])
                for shp in [(1, 10, 100, 100),
@@ -245,7 +243,7 @@ def test_pooling():
            # This test the CPU grad + opt + GPU implemtentation
            def fn(x):
                return max_pool_2d(x, (ws, ws), ignore_border=True,
-                                   padding=pad)
+                                   padding=pad, mode=mode)
            theano.tests.unittest_tools.verify_grad(fn, [data],
                                                    cast_to_output_type=False,
                                                    mode=mode_with_gpu)
@@ -261,7 +259,7 @@ def test_pooling():
                    x, ws=(ws, ws),
                    stride=(stride, stride),
                    pad=pad,
-                    mode='max' if func is T.max else "average")
+                    mode=mode)
                return dnn_op
            theano.tests.unittest_tools.verify_grad(
                fn, [data],
@@ -274,17 +272,16 @@ def test_pooling():
                        for node in fg.maker.fgraph.toposort()])
            g_out = fg(data)
-            if func is T.max:
+            # Compare again the CPU result
-                # Compare again the CPU result
+            out = max_pool_2d(x, (ws, ws),
-                out = max_pool_2d(x, (ws, ws),
+                              padding=pad,
-                                  padding=pad,
+                              ignore_border=True, mode=mode)
-                                  ignore_border=True)
+            fc = theano.function([x], theano.grad(out.sum(), x),
-                fc = theano.function([x], theano.grad(out.sum(), x),
+                                 mode=mode_without_gpu)
-                                     mode=mode_without_gpu)
+            assert any([isinstance(node.op, DownsampleFactorMaxGrad)
-                assert any([isinstance(node.op, DownsampleFactorMaxGrad)
+                        for node in fc.maker.fgraph.toposort()])
-                            for node in fc.maker.fgraph.toposort()])
+            c_out = fc(data)
-                c_out = fc(data)
+            assert numpy.allclose(c_out, g_out)
-                assert numpy.allclose(c_out, g_out)
 def test_pooling_opt():
@@ -523,7 +520,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        for params in product(
            [(1, 1), (2, 2), (3, 3)],
            [(1, 1), (2, 2), (3, 3)],
-            ['max', 'average']
+            ['max', 'average_inc_pad', 'average_exc_pad']
        ):
            desc = dnn.GpuDnnPoolDesc(
                ws=params[0],
@@ -559,7 +556,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
        for params in product(
            [(1, 1), (2, 2), (3, 3)],
            [(1, 1), (2, 2), (3, 3)],
-            ['max', 'average']
+            ['max', 'average_inc_pad']
        ):
            desc = dnn.GpuDnnPoolDesc(
                ws=params[0],

--- a/theano/tensor/signal/downsample.py
+++ b/theano/tensor/signal/downsample.py
--- a/theano/tensor/signal/tests/test_downsample.py
+++ b/theano/tensor/signal/tests/test_downsample.py