Refactor pooling to separate file

ad73ad3e · Alexander Matyasko · e422c2af · ad73ad3e · ad73ad3e · ad73ad3e
--- a/theano/gpuarray/blas.py
+++ b/theano/gpuarray/blas.py
@@ -1550,201 +1550,6 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
            return [[1], [1], [0], [0], [0]]  # no connection to height, width, depth
-class GpuPool(CGpuKernelBase):
-    """
-    Implement the max and average pooling on the gpu.
-    """
-    __props__ = ('ignore_border', 'mode', 'ndim')
-    def __init__(self, ignore_border, mode='max', ndim=2):
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        if mode == 'average':
-            mode = 'average_inc_pad'
-        self.mode = mode
-        CGpuKernelBase.__init__(self, ['pool.c'],
-                                'APPLY_SPECIFIC(pool)')
-        assert mode in ('max', 'sum', 'average_inc_pad', 'average_exc_pad')
-        assert self.ndim in [2, 3]
-    def c_headers(self):
-        return ['gpuarray_api.h', 'gpuarray_helper.h',
-                'numpy_compat.h', 'float.h']
-    def c_header_dirs(self):
-        return [os.path.dirname(__file__), pygpu.get_include()]
-    def make_node(self, inp, ws, stride, pad):
-        ctx_name = infer_context_name(inp)
-        inp = as_gpuarray_variable(inp, ctx_name)
-        assert (inp.ndim in [4, 5])
-        ws = as_tensor_variable(ws)
-        stride = as_tensor_variable(stride)
-        pad = as_tensor_variable(pad)
-        assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
-        assert ws.type.ndim == 1
-        return Apply(self, [inp, ws, stride, pad], [inp.type()])
-    def get_params(self, node):
-        return node.inputs[0].type.context
-    def get_op_params(self):
-        ignore_border = int(self.ignore_border)
-        max_pool = int(self.mode == 'max')
-        inc_pad = int(self.mode != 'average_exc_pad')
-        sum_mode = int(self.mode == 'sum')
-        return [('IGNORE_BORDER', ignore_border),
-                ('INC_PAD', inc_pad),
-                ('MAX_POOL', max_pool),
-                ('SUM_MODE', sum_mode)]
-class GpuMaxPoolGrad(CGpuKernelBase):
-    """
-    Implement the grad of max pooling on the gpu.
-    """
-    __props__ = ('ignore_border', 'mode', 'ndim')
-    def __init__(self, ignore_border, mode='max', ndim=2):
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        self.mode = mode
-        CGpuKernelBase.__init__(self, ['pool_max_grad.c'],
-                                'APPLY_SPECIFIC(max_pool_grad)')
-        assert mode == 'max'
-        assert ndim in [2, 3]
-    def c_headers(self):
-        return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
-    def c_header_dirs(self):
-        return [os.path.dirname(__file__), pygpu.get_include()]
-    def make_node(self, inp, out, out_grad, ws, stride, pad):
-        ctx_name = infer_context_name(inp, out, out_grad)
-        inp = as_gpuarray_variable(inp, ctx_name)
-        assert (inp.ndim in [4, 5])
-        out = as_gpuarray_variable(out, ctx_name)
-        assert(out.ndim in [4, 5])
-        out_grad = as_gpuarray_variable(out_grad, ctx_name)
-        assert (out_grad.ndim in [4, 5])
-        assert (out_grad.ndim == inp.ndim)
-        assert (inp.ndim == out.ndim)
-        ws = as_tensor_variable(ws)
-        stride = as_tensor_variable(stride)
-        pad = as_tensor_variable(pad)
-        assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
-        assert ws.type.ndim == 1
-        return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
-    def get_params(self, node):
-        return node.inputs[0].type.context
-class GpuAveragePoolGrad(CGpuKernelBase):
-    """
-    Implement the grad of average pooling on the gpu.
-    """
-    __props__ = ('ignore_border', 'mode', 'ndim')
-    def __init__(self, ignore_border, mode='max', ndim=2):
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        if mode == 'average':
-            mode = 'average_inc_pad'
-        self.mode = mode
-        CGpuKernelBase.__init__(self, ['pool_ave_grad.c'],
-                                'APPLY_SPECIFIC(ave_pool_grad)')
-        assert mode in ('sum', 'average_inc_pad', 'average_exc_pad')
-        assert ndim in [2, 3]
-    def c_headers(self):
-        return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
-    def c_header_dirs(self):
-        return [os.path.dirname(__file__), pygpu.get_include()]
-    def make_node(self, inp, out_grad, ws, stride, pad):
-        ctx_name = infer_context_name(inp, out_grad)
-        inp = as_gpuarray_variable(inp, ctx_name)
-        assert (inp.ndim in [4, 5])
-        out_grad = as_gpuarray_variable(out_grad, ctx_name)
-        assert (out_grad.ndim in [4, 5])
-        assert (out_grad.ndim == inp.ndim)
-        ws = as_tensor_variable(ws)
-        stride = as_tensor_variable(stride)
-        pad = as_tensor_variable(pad)
-        assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
-        assert ws.type.ndim == 1
-        return Apply(self, [inp, out_grad, ws, stride, pad], [inp.type()])
-    def get_params(self, node):
-        return node.inputs[0].type.context
-    def get_op_params(self):
-        inc_pad = int(self.mode == 'average_inc_pad')
-        sum_mode = int(self.mode == 'sum')
-        return [('INC_PAD', inc_pad),
-                ('SUM_MODE', sum_mode)]
-class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
-    """
-    Implement the grad of downsample with max on the gpu.
-    """
-    __props__ = ('ignore_border', 'mode', 'ndim')
-    def __init__(self, ignore_border, mode='max', ndim=2):
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        self.mode = mode
-        CGpuKernelBase.__init__(self, ['pool_grad_grad.c'],
-                                'APPLY_SPECIFIC(pool_grad_grad)')
-        assert self.mode == 'max'
-        assert self.ndim in [2, 3]
-    def c_headers(self):
-        return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
-    def c_header_dirs(self):
-        return [os.path.dirname(__file__), pygpu.get_include()]
-    def make_node(self, inp, out, out_grad, ws, stride, pad):
-        ctx_name = infer_context_name(inp, out, out_grad)
-        inp = as_gpuarray_variable(inp, ctx_name)
-        assert (inp.ndim in [4, 5])
-        out = as_gpuarray_variable(out, ctx_name)
-        assert (out_grad.ndim in [4, 5])
-        out_grad = as_gpuarray_variable(out_grad, ctx_name)
-        assert(out.ndim in [4, 5])
-        assert (out_grad.ndim == inp.ndim)
-        assert (inp.ndim == out.ndim)
-        ws = as_tensor_variable(ws)
-        stride = as_tensor_variable(stride)
-        pad = as_tensor_variable(pad)
-        assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
-        assert ws.type.ndim == 1
-        return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
-    def get_params(self, node):
-        return node.inputs[0].type.context
 @inplace_allocempty(GpuGemv, 0)
 def local_inplace_gpuagemv(node, inputs):
    return [gpugemv_inplace(*inputs)]

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -47,8 +47,8 @@ from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
                   gpugemmbatch_no_inplace,
                   gpugemv_no_inplace, gpugemv_inplace,
                   GpuCorrMM, GpuCorrMM_gradInputs, GpuCorrMM_gradWeights,
-                   GpuCorr3dMM, GpuCorr3dMM_gradInputs, GpuCorr3dMM_gradWeights,
+                   GpuCorr3dMM, GpuCorr3dMM_gradInputs, GpuCorr3dMM_gradWeights)
-                   GpuPool, GpuMaxPoolGrad, GpuAveragePoolGrad,
+from .pool import (GpuPool, GpuMaxPoolGrad, GpuAveragePoolGrad,
                   GpuDownsampleFactorMaxGradGrad)
 from .blocksparse import (GpuSparseBlockGemv, GpuSparseBlockOuter,
                          gpu_sparse_block_outer,

--- a/theano/gpuarray/pool.py
+++ b/theano/gpuarray/pool.py
+import os.path
+import pygpu
+from theano import Apply
+from theano.gpuarray import as_gpuarray_variable
+from theano.gpuarray.basic_ops import CGpuKernelBase, infer_context_name
+from theano.tensor.basic import as_tensor_variable
+class GpuPool(CGpuKernelBase):
+    """
+    Implement the max and average pooling on the gpu.
+    """
+    __props__ = ('ignore_border', 'mode', 'ndim')
+    def __init__(self, ignore_border, mode='max', ndim=2):
+        self.ndim = ndim
+        self.ignore_border = ignore_border
+        if mode == 'average':
+            mode = 'average_inc_pad'
+        self.mode = mode
+        CGpuKernelBase.__init__(self, ['pool.c'],
+                                'APPLY_SPECIFIC(pool)')
+        assert mode in ('max', 'sum', 'average_inc_pad', 'average_exc_pad')
+        assert self.ndim in [2, 3]
+    def c_headers(self):
+        return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
+    def c_header_dirs(self):
+        return [os.path.dirname(__file__), pygpu.get_include()]
+    def make_node(self, inp, ws, stride, pad):
+        ctx_name = infer_context_name(inp)
+        inp = as_gpuarray_variable(inp, ctx_name)
+        assert (inp.ndim == self.ndim + 2)
+        ws = as_tensor_variable(ws)
+        stride = as_tensor_variable(stride)
+        pad = as_tensor_variable(pad)
+        assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
+        assert ws.type.ndim == 1
+        return Apply(self, [inp, ws, stride, pad], [inp.type()])
+    def get_params(self, node):
+        return node.inputs[0].type.context
+    def get_op_params(self):
+        ignore_border = int(self.ignore_border)
+        max_pool = int(self.mode == 'max')
+        inc_pad = int(self.mode != 'average_exc_pad')
+        sum_mode = int(self.mode == 'sum')
+        return [('IGNORE_BORDER', ignore_border),
+                ('INC_PAD', inc_pad),
+                ('MAX_POOL', max_pool),
+                ('SUM_MODE', sum_mode)]
+class GpuMaxPoolGrad(CGpuKernelBase):
+    """
+    Implement the grad of max pooling on the gpu.
+    """
+    __props__ = ('ignore_border', 'mode', 'ndim')
+    def __init__(self, ignore_border, mode='max', ndim=2):
+        self.ndim = ndim
+        self.ignore_border = ignore_border
+        self.mode = mode
+        CGpuKernelBase.__init__(self, ['pool_max_grad.c'],
+                                'APPLY_SPECIFIC(max_pool_grad)')
+        assert mode == 'max'
+        assert ndim in [2, 3]
+    def c_headers(self):
+        return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
+    def c_header_dirs(self):
+        return [os.path.dirname(__file__), pygpu.get_include()]
+    def make_node(self, inp, out, out_grad, ws, stride, pad):
+        ctx_name = infer_context_name(inp, out, out_grad)
+        inp = as_gpuarray_variable(inp, ctx_name)
+        assert (inp.ndim == self.ndim + 2)
+        out = as_gpuarray_variable(out, ctx_name)
+        assert (out.ndim == self.ndim + 2)
+        out_grad = as_gpuarray_variable(out_grad, ctx_name)
+        assert (out_grad.ndim in [4, 5])
+        assert (out_grad.ndim == inp.ndim)
+        assert (inp.ndim == out.ndim)
+        ws = as_tensor_variable(ws)
+        stride = as_tensor_variable(stride)
+        pad = as_tensor_variable(pad)
+        assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
+        assert ws.type.ndim == 1
+        return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
+    def get_params(self, node):
+        return node.inputs[0].type.context
+class GpuAveragePoolGrad(CGpuKernelBase):
+    """
+    Implement the grad of average pooling on the gpu.
+    """
+    __props__ = ('ignore_border', 'mode', 'ndim')
+    def __init__(self, ignore_border, mode='max', ndim=2):
+        self.ndim = ndim
+        self.ignore_border = ignore_border
+        if mode == 'average':
+            mode = 'average_inc_pad'
+        self.mode = mode
+        CGpuKernelBase.__init__(self, ['pool_ave_grad.c'],
+                                'APPLY_SPECIFIC(ave_pool_grad)')
+        assert mode in ('sum', 'average_inc_pad', 'average_exc_pad')
+        assert ndim in [2, 3]
+    def c_headers(self):
+        return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
+    def c_header_dirs(self):
+        return [os.path.dirname(__file__), pygpu.get_include()]
+    def make_node(self, inp, out_grad, ws, stride, pad):
+        ctx_name = infer_context_name(inp, out_grad)
+        inp = as_gpuarray_variable(inp, ctx_name)
+        assert (inp.ndim == self.ndim + 2)
+        out_grad = as_gpuarray_variable(out_grad, ctx_name)
+        assert (out_grad.ndim == self.ndim + 2)
+        assert (out_grad.ndim == inp.ndim)
+        ws = as_tensor_variable(ws)
+        stride = as_tensor_variable(stride)
+        pad = as_tensor_variable(pad)
+        assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
+        assert ws.type.ndim == 1
+        return Apply(self, [inp, out_grad, ws, stride, pad], [inp.type()])
+    def get_params(self, node):
+        return node.inputs[0].type.context
+    def get_op_params(self):
+        inc_pad = int(self.mode == 'average_inc_pad')
+        sum_mode = int(self.mode == 'sum')
+        return [('INC_PAD', inc_pad),
+                ('SUM_MODE', sum_mode)]
+class GpuDownsampleFactorMaxGradGrad(CGpuKernelBase):
+    """
+    Implement the grad of downsample with max on the gpu.
+    """
+    __props__ = ('ignore_border', 'mode', 'ndim')
+    def __init__(self, ignore_border, mode='max', ndim=2):
+        self.ndim = ndim
+        self.ignore_border = ignore_border
+        self.mode = mode
+        CGpuKernelBase.__init__(self, ['pool_grad_grad.c'],
+                                'APPLY_SPECIFIC(pool_grad_grad)')
+        assert self.mode == 'max'
+        assert self.ndim in [2, 3]
+    def c_headers(self):
+        return ['gpuarray_api.h', 'gpuarray_helper.h', 'numpy_compat.h']
+    def c_header_dirs(self):
+        return [os.path.dirname(__file__), pygpu.get_include()]
+    def make_node(self, inp, out, out_grad, ws, stride, pad):
+        ctx_name = infer_context_name(inp, out, out_grad)
+        inp = as_gpuarray_variable(inp, ctx_name)
+        assert (inp.ndim == self.ndim + 2)
+        out = as_gpuarray_variable(out, ctx_name)
+        assert (out_grad.ndim == self.ndim + 2)
+        out_grad = as_gpuarray_variable(out_grad, ctx_name)
+        assert (out.ndim == self.ndim + 2)
+        assert (out_grad.ndim == inp.ndim)
+        assert (inp.ndim == out.ndim)
+        ws = as_tensor_variable(ws)
+        stride = as_tensor_variable(stride)
+        pad = as_tensor_variable(pad)
+        assert ws.type.ndim == stride.type.ndim and ws.type.ndim == pad.type.ndim
+        assert ws.type.ndim == 1
+        return Apply(self, [inp, out, out_grad, ws, stride, pad], [inp.type()])
+    def get_params(self, node):
+        return node.inputs[0].type.context
\ No newline at end of file
--- a/theano/gpuarray/tests/test_blas.py
+++ b/theano/gpuarray/tests/test_blas.py
@@ -2,28 +2,21 @@ from __future__ import absolute_import, print_function, division
 from unittest import TestCase
 from nose.plugins.skip import SkipTest
 import itertools
-import copy
-import numpy
 import theano
-from theano import gradient
 from theano import tensor
 from theano.tests import unittest_tools as utt
 from theano.tensor.blas import gemv_inplace, gemm_inplace, _dot22, batched_dot
 from theano.tensor.tests.test_blas import TestGer, BaseGemv
-from theano.tensor.signal.pool import (Pool, MaxPoolGrad, AveragePoolGrad,
-                                       DownsampleFactorMaxGradGrad)
 from .. import gpuarray_shared_constructor
-from .config import mode_with_gpu, mode_without_gpu
+from .config import mode_with_gpu
 from .test_basic_ops import makeTester, rand
 from ..blas import (gpugemv_inplace, gpugemv_no_inplace,
                    gpugemm_inplace, gpugemmbatch_no_inplace,
                    gpuger_inplace, gpuger_no_inplace,
-                    GpuGer, gpu_dot22, GpuPool, GpuMaxPoolGrad,
+                    GpuGer, gpu_dot22)
-                    GpuAveragePoolGrad, GpuDownsampleFactorMaxGradGrad)
 GpuGemvTester = makeTester(
@@ -135,199 +128,3 @@ GpuDot22Tester = makeTester(
        # test9=[rand(0, 0), rand(0, 0)],
    )
 )
-def test_pool2d():
-    shps = [(1, 12),
-            (1, 1, 12),
-            (1, 1, 1, 12),
-            (1, 1, 2, 2),
-            (1, 1, 1, 1),
-            (1, 1, 4, 4),
-            (1, 1, 10, 11),
-            (1, 2, 2, 2),
-            (3, 5, 4, 4),
-            (25, 1, 7, 7),
-            (1, 1, 12, 12),
-            (1, 1, 2, 14),
-            (1, 1, 12, 14),
-            (1, 1, 14, 14),
-            (1, 1, 16, 16),
-            (1, 1, 18, 18),
-            (1, 1, 24, 24),
-            (1, 6, 24, 24),
-            (10, 1, 24, 24),
-            (10, 6, 24, 24),
-            (30, 6, 12, 12),
-            (30, 2, 24, 24),
-            (30, 6, 24, 24),
-            (10, 10, 10, 11),
-            (1, 1, 10, 1025),
-            (1, 1, 10, 1023),
-            (1, 1, 1025, 10),
-            (1, 1, 1023, 10), ]
-    numpy.random.RandomState(utt.fetch_seed()).shuffle(shps)
-    test_ws = (2, 2), (3, 2), (1, 1)
-    test_st = (2, 2), (3, 2), (1, 1)
-    test_mode = ['max', 'sum', 'average_inc_pad', 'average_exc_pad']
-    ref_mode = copy.copy(mode_without_gpu)
-    ref_mode.check_py_code = False
-    gpu_mode = copy.copy(mode_with_gpu)
-    gpu_mode.check_py_code = False
-    for shp in shps:
-        for mode, ws, st in itertools.product(test_mode, test_ws, test_st):
-            if ws[0] > shp[-2] or ws[1] > shp[-1]:
-                continue
-            for ignore_border, pad in zip((True, False), [(1, 1), (0, 0)]):
-                if pad[0] >= ws[0] or pad[1] >= ws[1]:
-                    continue
-                if mode == 'average_exc_pad' and (pad[0] > 0 or pad[1] > 0):
-                    continue
-                # print('test_pool2d', shp, ws, st, pad, mode, ignore_border)
-                ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border)
-                a = theano.shared(rand(*shp), 'a')
-                a_pooled = ds_op(tensor.as_tensor_variable(a), ws, st, pad)
-                f = theano.function([], a_pooled, mode=gpu_mode)
-                f2 = theano.function([], a_pooled, mode=ref_mode)
-                assert any([isinstance(node.op, GpuPool)
-                            for node in f.maker.fgraph.toposort()])
-                assert any([isinstance(node.op, Pool)
-                            for node in f2.maker.fgraph.toposort()])
-                assert numpy.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)
-                a_pooled_grad = tensor.grad(a_pooled.sum(), a)
-                g = theano.function([], a_pooled_grad, mode=gpu_mode)
-                g2 = theano.function([], a_pooled_grad, mode=ref_mode)
-                if mode == 'max':
-                    gop = GpuMaxPoolGrad
-                    gop2 = MaxPoolGrad
-                else:
-                    gop = GpuAveragePoolGrad
-                    gop2 = AveragePoolGrad
-                assert any([isinstance(node.op, gop)
-                            for node in g.maker.fgraph.toposort()])
-                assert any([isinstance(node.op, gop2)
-                            for node in g2.maker.fgraph.toposort()])
-                assert numpy.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)
-                # test grad grad for max pooling
-                # for average pooling grad grad is just average pooling grad
-                if mode != 'max':
-                    continue
-                ggf = gradient.Lop(tensor.grad((a_pooled**2).sum(), a), a, a)
-                gg = theano.function([], ggf, mode=gpu_mode)
-                gg2 = theano.function([], ggf, mode=ref_mode)
-                assert any([
-                    isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
-                    for node in gg.maker.fgraph.toposort()
-                ])
-                assert any([
-                    isinstance(node.op, DownsampleFactorMaxGradGrad)
-                    for node in gg2.maker.fgraph.toposort()
-                ])
-                assert numpy.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
-def test_pool3d():
-    shps = [(1, 1, 12),
-            (1, 1, 1, 1, 1),
-            (1, 1, 1, 1, 1025),
-            (1, 1, 2, 2, 2),
-            (1, 1, 7, 7, 7),
-            (1, 1, 9, 10, 11),
-            (1, 6, 18, 18, 18),
-            (1, 1, 6, 24, 24),
-            (1, 10, 1, 24, 24),
-            (1, 10, 6, 24, 24),
-            (1, 30, 6, 12, 12),
-            (1, 30, 2, 24, 24),
-            (1, 30, 6, 24, 24),
-            (1, 10, 10, 10, 11),
-            (1, 1, 10, 10, 1025),
-            (1, 1, 10, 10, 1023),
-            (1, 1, 10, 1025, 10),
-            (1, 1, 10, 1023, 10), ]
-    numpy.random.RandomState(utt.fetch_seed()).shuffle(shps)
-    test_ws = (2, 2, 2), (3, 2, 3), (1, 1, 1)
-    test_st = (2, 2, 2), (2, 3, 2), (1, 1, 1)
-    test_mode = ['max', 'sum', 'average_inc_pad', 'average_exc_pad']
-    ref_mode = copy.copy(mode_without_gpu)
-    ref_mode.check_py_code = False
-    gpu_mode = copy.copy(mode_with_gpu)
-    gpu_mode.check_py_code = False
-    for shp in shps:
-        for mode, ws, st in itertools.product(test_mode, test_ws, test_st):
-            if ws[0] > shp[-3] or ws[1] > shp[-2] or ws[2] > shp[-1]:
-                continue
-            for ignore_border, pad in zip((True, False), [(1, 1, 1), (0, 0, 0)]):
-                if pad[0] >= ws[0] or pad[1] >= ws[1] or pad[2] >= ws[2]:
-                    continue
-                if mode == 'average_exc_pad' and (pad[0] > 0 or pad[1] > 0 or pad[2] > 0):
-                    continue
-                # print('test_pool3d', shp, ws, st, pad, mode, ignore_border)
-                ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border)
-                a = theano.shared(rand(*shp), 'a')
-                a_pooled = ds_op(tensor.as_tensor_variable(a), ws, st, pad)
-                f = theano.function([], a_pooled, mode=gpu_mode)
-                f2 = theano.function([], a_pooled, mode=ref_mode)
-                assert any([isinstance(node.op, GpuPool)
-                            for node in f.maker.fgraph.toposort()])
-                assert any([isinstance(node.op, Pool)
-                            for node in f2.maker.fgraph.toposort()])
-                assert numpy.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)
-                a_pooled_grad = tensor.grad(a_pooled.sum(), a)
-                g = theano.function([], a_pooled_grad, mode=gpu_mode)
-                g2 = theano.function([], a_pooled_grad, mode=ref_mode)
-                if mode == 'max':
-                    gop = GpuMaxPoolGrad
-                    gop2 = MaxPoolGrad
-                else:
-                    gop = GpuAveragePoolGrad
-                    gop2 = AveragePoolGrad
-                assert any([isinstance(node.op, gop)
-                            for node in g.maker.fgraph.toposort()])
-                assert any([isinstance(node.op, gop2)
-                            for node in g2.maker.fgraph.toposort()])
-                assert numpy.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)
-                # test grad grad for max pooling
-                # for average pooling grad grad is just average pooling grad
-                if mode != 'max':
-                    continue
-                ggf = gradient.Lop(tensor.grad((a_pooled**2).sum(), a), a, a)
-                gg = theano.function([], ggf, mode=gpu_mode)
-                gg2 = theano.function([], ggf, mode=ref_mode)
-                assert any([
-                    isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
-                    for node in gg.maker.fgraph.toposort()
-                ])
-                assert any([
-                    isinstance(node.op, DownsampleFactorMaxGradGrad)
-                    for node in gg2.maker.fgraph.toposort()
-                ])
-                assert numpy.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
--- a/theano/gpuarray/tests/test_pool.py
+++ b/theano/gpuarray/tests/test_pool.py
+from __future__ import absolute_import, print_function, division
+import copy
+import itertools
+import numpy
+import theano
+from theano import gradient
+from theano import tensor
+from theano.tensor.signal.pool import (Pool, MaxPoolGrad, AveragePoolGrad,
+                                       DownsampleFactorMaxGradGrad)
+from theano.tests import unittest_tools as utt
+from .config import mode_with_gpu, mode_without_gpu
+from .test_basic_ops import rand
+from ..pool import (GpuPool, GpuMaxPoolGrad, GpuAveragePoolGrad,
+                    GpuDownsampleFactorMaxGradGrad)
+def test_pool2d():
+    shps = [(1, 12),
+            (1, 1, 12),
+            (1, 1, 1, 12),
+            (1, 1, 2, 2),
+            (1, 1, 1, 1),
+            (1, 1, 4, 4),
+            (1, 1, 10, 11),
+            (1, 2, 2, 2),
+            (3, 5, 4, 4),
+            (25, 1, 7, 7),
+            (1, 1, 12, 12),
+            (1, 1, 2, 14),
+            (1, 1, 12, 14),
+            (1, 1, 14, 14),
+            (1, 1, 16, 16),
+            (1, 1, 18, 18),
+            (1, 1, 24, 24),
+            (1, 6, 24, 24),
+            (10, 1, 24, 24),
+            (10, 6, 24, 24),
+            (30, 6, 12, 12),
+            (30, 2, 24, 24),
+            (30, 6, 24, 24),
+            (10, 10, 10, 11),
+            (1, 1, 10, 1025),
+            (1, 1, 10, 1023),
+            (1, 1, 1025, 10),
+            (1, 1, 1023, 10), ]
+    numpy.random.RandomState(utt.fetch_seed()).shuffle(shps)
+    test_ws = (2, 2), (3, 2), (1, 1)
+    test_st = (2, 2), (3, 2), (1, 1)
+    test_mode = ['max', 'sum', 'average_inc_pad', 'average_exc_pad']
+    ref_mode = copy.copy(mode_without_gpu)
+    ref_mode.check_py_code = False
+    gpu_mode = copy.copy(mode_with_gpu)
+    gpu_mode.check_py_code = False
+    for shp in shps:
+        for mode, ws, st in itertools.product(test_mode, test_ws, test_st):
+            if ws[0] > shp[-2] or ws[1] > shp[-1]:
+                continue
+            for ignore_border, pad in zip((True, False), [(1, 1), (0, 0)]):
+                if pad[0] >= ws[0] or pad[1] >= ws[1]:
+                    continue
+                if mode == 'average_exc_pad' and (pad[0] > 0 or pad[1] > 0):
+                    continue
+                # print('test_pool2d', shp, ws, st, pad, mode, ignore_border)
+                ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border)
+                a = theano.shared(rand(*shp), 'a')
+                a_pooled = ds_op(tensor.as_tensor_variable(a), ws, st, pad)
+                f = theano.function([], a_pooled, mode=gpu_mode)
+                f2 = theano.function([], a_pooled, mode=ref_mode)
+                assert any([isinstance(node.op, GpuPool)
+                            for node in f.maker.fgraph.toposort()])
+                assert any([isinstance(node.op, Pool)
+                            for node in f2.maker.fgraph.toposort()])
+                assert numpy.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)
+                a_pooled_grad = tensor.grad(a_pooled.sum(), a)
+                g = theano.function([], a_pooled_grad, mode=gpu_mode)
+                g2 = theano.function([], a_pooled_grad, mode=ref_mode)
+                if mode == 'max':
+                    gop = GpuMaxPoolGrad
+                    gop2 = MaxPoolGrad
+                else:
+                    gop = GpuAveragePoolGrad
+                    gop2 = AveragePoolGrad
+                assert any([isinstance(node.op, gop)
+                            for node in g.maker.fgraph.toposort()])
+                assert any([isinstance(node.op, gop2)
+                            for node in g2.maker.fgraph.toposort()])
+                assert numpy.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)
+                # test grad grad for max pooling
+                # for average pooling grad grad is just average pooling grad
+                if mode != 'max':
+                    continue
+                ggf = gradient.Lop(tensor.grad((a_pooled**2).sum(), a), a, a)
+                gg = theano.function([], ggf, mode=gpu_mode)
+                gg2 = theano.function([], ggf, mode=ref_mode)
+                assert any([
+                    isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
+                    for node in gg.maker.fgraph.toposort()
+                ])
+                assert any([
+                    isinstance(node.op, DownsampleFactorMaxGradGrad)
+                    for node in gg2.maker.fgraph.toposort()
+                ])
+                assert numpy.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)
+def test_pool3d():
+    shps = [(1, 1, 12),
+            (1, 1, 1, 1, 1),
+            (1, 1, 1, 1, 1025),
+            (1, 1, 2, 2, 2),
+            (1, 1, 7, 7, 7),
+            (1, 1, 9, 10, 11),
+            (1, 6, 18, 18, 18),
+            (1, 1, 6, 24, 24),
+            (1, 10, 1, 24, 24),
+            (1, 10, 6, 24, 24),
+            (1, 30, 6, 12, 12),
+            (1, 30, 2, 24, 24),
+            (1, 30, 6, 24, 24),
+            (1, 10, 10, 10, 11),
+            (1, 1, 10, 10, 1025),
+            (1, 1, 10, 10, 1023),
+            (1, 1, 10, 1025, 10),
+            (1, 1, 10, 1023, 10), ]
+    numpy.random.RandomState(utt.fetch_seed()).shuffle(shps)
+    test_ws = (2, 2, 2), (3, 2, 3), (1, 1, 1)
+    test_st = (2, 2, 2), (2, 3, 2), (1, 1, 1)
+    test_mode = ['max', 'sum', 'average_inc_pad', 'average_exc_pad']
+    ref_mode = copy.copy(mode_without_gpu)
+    ref_mode.check_py_code = False
+    gpu_mode = copy.copy(mode_with_gpu)
+    gpu_mode.check_py_code = False
+    for shp in shps:
+        for mode, ws, st in itertools.product(test_mode, test_ws, test_st):
+            if ws[0] > shp[-3] or ws[1] > shp[-2] or ws[2] > shp[-1]:
+                continue
+            for ignore_border, pad in zip((True, False), [(1, 1, 1), (0, 0, 0)]):
+                if pad[0] >= ws[0] or pad[1] >= ws[1] or pad[2] >= ws[2]:
+                    continue
+                if mode == 'average_exc_pad' and (pad[0] > 0 or pad[1] > 0 or pad[2] > 0):
+                    continue
+                # print('test_pool3d', shp, ws, st, pad, mode, ignore_border)
+                ds_op = Pool(ndim=len(ws), mode=mode, ignore_border=ignore_border)
+                a = theano.shared(rand(*shp), 'a')
+                a_pooled = ds_op(tensor.as_tensor_variable(a), ws, st, pad)
+                f = theano.function([], a_pooled, mode=gpu_mode)
+                f2 = theano.function([], a_pooled, mode=ref_mode)
+                assert any([isinstance(node.op, GpuPool)
+                            for node in f.maker.fgraph.toposort()])
+                assert any([isinstance(node.op, Pool)
+                            for node in f2.maker.fgraph.toposort()])
+                assert numpy.allclose(f(), f2()), (shp, ws, st, pad, mode, ignore_border)
+                a_pooled_grad = tensor.grad(a_pooled.sum(), a)
+                g = theano.function([], a_pooled_grad, mode=gpu_mode)
+                g2 = theano.function([], a_pooled_grad, mode=ref_mode)
+                if mode == 'max':
+                    gop = GpuMaxPoolGrad
+                    gop2 = MaxPoolGrad
+                else:
+                    gop = GpuAveragePoolGrad
+                    gop2 = AveragePoolGrad
+                assert any([isinstance(node.op, gop)
+                            for node in g.maker.fgraph.toposort()])
+                assert any([isinstance(node.op, gop2)
+                            for node in g2.maker.fgraph.toposort()])
+                assert numpy.allclose(g(), g2()), (shp, ws, st, pad, mode, ignore_border)
+                # test grad grad for max pooling
+                # for average pooling grad grad is just average pooling grad
+                if mode != 'max':
+                    continue
+                ggf = gradient.Lop(tensor.grad((a_pooled**2).sum(), a), a, a)
+                gg = theano.function([], ggf, mode=gpu_mode)
+                gg2 = theano.function([], ggf, mode=ref_mode)
+                assert any([
+                    isinstance(node.op, GpuDownsampleFactorMaxGradGrad)
+                    for node in gg.maker.fgraph.toposort()
+                ])
+                assert any([
+                    isinstance(node.op, DownsampleFactorMaxGradGrad)
+                    for node in gg2.maker.fgraph.toposort()
+                ])
+                assert numpy.allclose(gg(), gg2()), (shp, ws, st, pad, mode, ignore_border)