remove pytensor.tensor.signal module as it was marked deprecated

09010219 · Maxim Kochurov · Maxim Kochurov · b6fb55d5 · b6fb55d5 · b6fb55d5
--- a/pytensor/tensor/signal/__init__.py
+++ b/pytensor/tensor/signal/__init__.py
--- a/pytensor/tensor/signal/conv.py
+++ b/pytensor/tensor/signal/conv.py
-"""
-Contains a wrapper function for tensor.nnet.ConvOp, which can be used to perform
-generic 2D convolution.
-"""
-import logging
-import warnings
-from pytensor import tensor as at
-from pytensor.tensor.nnet import conv
-from pytensor.tensor.shape import reshape
-warnings.warn(
-    "The module `pytensor.tensor.signal` is deprecated and will "
-    "be removed from PyTensor in version 2.9.0.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-__docformat__ = "restructuredtext en"
-_logger = logging.getLogger("pytensor.tensor.signal.conv")
-def conv2d(
-    input,
-    filters,
-    image_shape=None,
-    filter_shape=None,
-    border_mode="valid",
-    subsample=(1, 1),
-    **kargs,
-):
-    """
-    signal.conv.conv2d performs a basic 2D convolution of the input with the
-    given filters. The input parameter can be a single 2D image or a 3D tensor,
-    containing a set of images. Similarly, filters can be a single 2D filter or
-    a 3D tensor, corresponding to a set of 2D filters.
-    Shape parameters are optional and will result in faster execution.
-    Parameters
-    ----------
-    input   : Symbolic pytensor tensor for images to be filtered.
-              Dimensions: ([num_images], image height, image width)
-    filters : Symbolic pytensor tensor for convolution filter(s).
-              Dimensions: ([num_filters], filter height, filter width)
-    border_mode: {'valid', 'full'}
-        See scipy.signal.convolve2d.
-    subsample
-        Factor by which to subsample output.
-    image_shape : tuple of length 2 or 3
-        ([num_images,] image height, image width).
-    filter_shape : tuple of length 2 or 3
-        ([num_filters,] filter height, filter width).
-    kwargs
-        See pytensor.tensor.nnet.conv.conv2d.
-    Returns
-    -------
-    symbolic 2D,3D or 4D tensor
-        Tensor of filtered images, with shape
-        ([number images,] [number filters,] image height, image width).
-    """
-    assert input.ndim in (2, 3)
-    assert filters.ndim in (2, 3)
-    # use shape information if it is given to us ###
-    if filter_shape and image_shape:
-        if input.ndim == 3:
-            bsize = image_shape[0]
-        else:
-            bsize = 1
-        imshp = (1,) + tuple(image_shape[-2:])
-        if filters.ndim == 3:
-            nkern = filter_shape[0]
-        else:
-            nkern = 1
-        kshp = filter_shape[-2:]
-    else:
-        nkern, kshp = None, None
-        bsize, imshp = None, None
-    # reshape tensors to 4D, for compatibility with ConvOp ###
-    if input.ndim == 3:
-        sym_bsize = input.shape[0]
-    else:
-        sym_bsize = 1
-    if filters.ndim == 3:
-        sym_nkern = filters.shape[0]
-    else:
-        sym_nkern = 1
-    new_input_shape = at.join(0, at.stack([sym_bsize, 1]), input.shape[-2:])
-    input4D = reshape(input, new_input_shape, ndim=4)
-    new_filter_shape = at.join(0, at.stack([sym_nkern, 1]), filters.shape[-2:])
-    filters4D = reshape(filters, new_filter_shape, ndim=4)
-    # perform actual convolution ###
-    op = conv.ConvOp(
-        output_mode=border_mode,
-        dx=subsample[0],
-        dy=subsample[1],
-        imshp=imshp,
-        kshp=kshp,
-        nkern=nkern,
-        bsize=bsize,
-        **kargs,
-    )
-    output = op(input4D, filters4D)
-    # flatten to 3D tensor if convolving with single filter or single image
-    if input.ndim == 2 and filters.ndim == 2:
-        output = at.flatten(output.T, ndim=2).T
-    elif input.ndim == 2 or filters.ndim == 2:
-        output = at.flatten(output.T, ndim=3).T
-    return output
--- a/pytensor/tensor/signal/pool.py
+++ b/pytensor/tensor/signal/pool.py
-"""
-Ops for downsampling images.
-Planned:
-Pool, DownsampleAvg, DownsampleSoftmax.
-"""
-import itertools
-import warnings
-import numpy as np
-import pytensor.tensor.basic as at
-import pytensor.tensor.math as tm
-from pytensor.gradient import DisconnectedType
-from pytensor.graph.basic import Apply, Constant, Variable
-from pytensor.graph.utils import MethodNotDefined
-from pytensor.link.c.op import OpenMPOp
-from pytensor.link.c.params_type import ParamsType
-from pytensor.scalar import bool as bool_t
-from pytensor.tensor.type import TensorType, int_dtypes
-warnings.warn(
-    "The module `pytensor.tensor.signal` is deprecated and will "
-    "be removed from PyTensor in version 2.8.5.",
-    DeprecationWarning,
-    stacklevel=2,
-)
-def max_pool_2d_same_size(input, patch_size):
-    """
-    Takes as input a 4-D tensor. It sets all non maximum values
-    of non-overlapping patches of size (patch_size[0],patch_size[1]) to zero,
-    keeping only the maximum values. The output has the same dimensions as
-    the input.
-    Parameters
-    ----------
-    input : 4-D pytensor tensor of input images
-        Input images. Max pooling will be done over the 2 last dimensions.
-    patch_size : tuple of length 2 or pytensor vector of ints of size 2.
-        Size of the patch (patch height, patch width).
-        (2,2) will retain only one non-zero value per patch of 4 values.
-    """
-    output = Pool(True)(input, patch_size)
-    outs = MaxPoolGrad(True)(input, output, output, patch_size)
-    return outs
-def pool_2d(
-    input,
-    ws=None,
-    ignore_border=None,
-    stride=None,
-    pad=(0, 0),
-    mode="max",
-    ds=None,
-    st=None,
-    padding=None,
-):
-    """Downscale the input by a specified factor
-    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
-    the specified factor, by keeping only the maximum value of non-overlapping
-    patches of size (ws[0],ws[1])
-    Parameters
-    ----------
-    input : N-D pytensor tensor of input images
-        Input images. Max pooling will be done over the 2 last dimensions.
-    ws : tuple of length 2 or pytensor vector of ints of size 2.
-        Factor by which to downscale (vertical ws, horizontal ws).
-        (2,2) will halve the image in each dimension.
-    ignore_border : bool (default None, will print a warning and set to False)
-        When True, (5,5) input with ws=(2,2) will generate a (2,2) output.
-        (3,3) otherwise.
-    stride : tuple of two ints or pytensor vector of ints of size 2.
-        Stride size, which is the number of shifts over rows/cols to get the
-        next pool region. If stride is None, it is considered equal to ws
-        (no overlap on pooling regions), eg: stride=(1,1) will shifts over
-        one row and one col for every iteration.
-    pad : tuple of two ints or pytensor vector of ints of size 2.
-        (pad_h, pad_w), pad zeros to extend beyond four borders of the
-        images, pad_h is the size of the top and bottom margins, and
-        pad_w is the size of the left and right margins.
-    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
-        Operation executed on each window. `max` and `sum` always exclude
-        the padding in the computation. `average` gives you the choice to
-        include or exclude it.
-    ds
-        *deprecated*, use parameter ws instead.
-    st
-        *deprecated*, use parameter stride instead.
-    padding
-        *deprecated*, use parameter pad instead.
-    """
-    # check for deprecated parameter names
-    if ds is not None:
-        if ws is not None:
-            raise ValueError(
-                "You can't provide a tuple value to both 'ws' and 'ds'."
-                " Please provide a value only to 'ws'."
-            )
-        else:
-            warnings.warn(
-                "The 'ds' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'ws'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            ws = ds
-    elif ds is None and ws is None:
-        raise ValueError("You must provide a tuple value for the window size.")
-    if st is not None:
-        if stride is not None:
-            raise ValueError(
-                "You can't provide a tuple value to both 'st and 'stride'."
-                " Please provide a value only to 'stride'."
-            )
-        else:
-            warnings.warn(
-                "The 'st' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'stride'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            stride = st
-    if padding is not None:
-        if pad not in {None, (0, 0)}:
-            raise ValueError(
-                "You can't provide a tuple value to both 'padding' and pad."
-                "  Please provide a value only to pad."
-            )
-        else:
-            warnings.warn(
-                "The 'padding' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'pad'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            pad = padding
-    if input.ndim < 2:
-        raise NotImplementedError("pool_2d requires a dimension >= 2")
-    if ignore_border is None:
-        warnings.warn(
-            "pool_2d() will have the parameter ignore_border"
-            " default value changed to True (currently"
-            " False). To have consistent behavior with all PyTensor"
-            " version, explicitly add the parameter ignore_border=True.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-        ignore_border = False
-    op = Pool(ignore_border, ndim=2, mode=mode)
-    output = op(input, ws, stride, pad)
-    return output
-def pool_3d(
-    input,
-    ws=None,
-    ignore_border=None,
-    stride=None,
-    pad=(0, 0, 0),
-    mode="max",
-    ds=None,
-    st=None,
-    padding=None,
-):
-    """Downscale the input by a specified factor
-    Takes as input a N-D tensor, where N >= 3. It downscales the input image by
-    the specified factor, by keeping only the maximum value of non-overlapping
-    patches of size (ws[0],ws[1],ws[2])
-    Parameters
-    ----------
-    input : N-D pytensor tensor of input images
-        Input images. Max pooling will be done over the 3 last dimensions.
-    ws : tuple of length 3 or pytensor vector of ints of size 3
-        Factor by which to downscale (vertical ws, horizontal ws, depth ws).
-        (2,2,2) will halve the image in each dimension.
-    ignore_border : bool (default None, will print a warning and set to False)
-        When True, (5,5,5) input with ws=(2,2,2) will generate a (2,2,2) output.
-        (3,3,3) otherwise.
-    st : tuple of three ints or pytensor vector of ints of size 3
-        Stride size, which is the number of shifts over rows/cols/slices to get
-        the next pool region. If st is None, it is considered equal to ws
-        (no overlap on pooling regions).
-    pad : tuple of two ints or pytensor vector of ints of size 3
-        (pad_h, pad_w, pad_d), pad zeros to extend beyond six borders of the
-        images, pad_h is the size of the top and bottom margins,
-        pad_w is the size of the left and right margins, and pad_d is the size
-        of the front and back margins
-    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
-        Operation executed on each window. `max` and `sum` always exclude
-        the padding in the computation. `average` gives you the choice to
-        include or exclude it.
-    ds
-        *deprecated*, use parameter ws instead.
-    st
-        *deprecated*, use parameter st instead.
-    padding
-        *deprecated*, use parameter pad instead.
-    """
-    # check for deprecated parameter names
-    if ds is not None:
-        if ws is not None:
-            raise ValueError(
-                "You can't provide a tuple value to both 'ws' and 'ds'."
-                " Please provide a value only to 'ws'."
-            )
-        else:
-            warnings.warn(
-                "The 'ds' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'ws'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            ws = ds
-    elif ds is None and ws is None:
-        raise ValueError("You must provide a tuple value for the window size.")
-    if st is not None:
-        if stride is not None:
-            raise ValueError(
-                "You can't provide a tuple value to both 'st and 'stride'."
-                " Please provide a value only to 'stride'."
-            )
-        else:
-            warnings.warn(
-                "The 'st' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'stride'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            stride = st
-    if padding is not None:
-        if pad not in {None, (0, 0, 0)}:
-            raise ValueError(
-                "You can't provide a tuple value to both 'padding' and pad."
-                "  Please provide a value only to pad."
-            )
-        else:
-            warnings.warn(
-                "The 'padding' parameter is not going to exist"
-                " anymore as it is going to be replaced by the parameter"
-                " 'pad'.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            pad = padding
-    if input.ndim < 3:
-        raise NotImplementedError("pool_3d requires a dimension >= 3")
-    if ignore_border is None:
-        warnings.warn(
-            "pool_3d() will have the parameter ignore_border"
-            " default value changed to True (currently"
-            " False). To have consistent behavior with all PyTensor"
-            " version, explicitly add the parameter ignore_border=True.",
-            category=DeprecationWarning,
-            stacklevel=2,
-        )
-        ignore_border = False
-    op = Pool(ignore_border, ndim=3, mode=mode)
-    output = op(input, ws, stride, pad)
-    return output
-class Pool(OpenMPOp):
-    """
-    sum or average over different patches.
-    Parameters
-    ----------
-    ws : list or tuple of N ints
-        Downsample factor over rows, columns etc.
-        ws indicates the size of the pooling region.
-    ignore_border : bool
-        If ws doesn't divide imgshape, do we include an extra row/col/slice
-        of partial downsampling (False) or ignore it (True).
-    stride : list or tuple of N ints or None
-        Stride size, which is the number of shifts over rows/cols/slices to get the
-        next pool region. If stride is None, it is considered equal to ws
-        (no overlap on pooling regions).
-    pad : tuple of N ints or None
-        For each downsampling dimension, this specifies the number of zeros to
-        add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
-        size of the top and bottom margins, pad_w specifies the size of the left and
-        right margins. No padding is added if pad is None.
-    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
-        ('average_inc_pad' excludes the padding from the count,
-        'average_exc_pad' include it)
-    ndim : int
-        The number of pooling dimensions N.
-        The default is 2.
-    ds
-        *deprecated*, use parameter ws instead.
-    st
-        *deprecated*, use parameter st instead.
-    padding
-        *deprecated*, use parameter pad instead.
-    """
-    __props__ = ("ignore_border", "mode", "ndim")
-    params_type = ParamsType(
-        ignore_border=bool_t,
-    )
-    @staticmethod
-    def out_shape(
-        imgshape,
-        ws=None,
-        ignore_border=False,
-        stride=None,
-        pad=None,
-        ndim=2,
-        ds=None,
-        st=None,
-        padding=None,
-    ):
-        """
-        Return the shape of the output from this op, for input of given
-        shape and flags.
-        Parameters
-        ----------
-        imgshape : tuple, list, or similar of integer or scalar PyTensor variable
-            The shape of a tensor of images. The last N elements are
-            interpreted as the number of rows, and the number of cols.
-        ws : list or tuple of N ints
-            Downsample factor over rows and column.
-            ws indicates the pool region size.
-        ignore_border : bool
-            If ws doesn't divide imgshape, do we include an extra row/col/slice
-            of partial downsampling (False) or ignore it (True).
-        stride : list or tuple of N ints or None
-            Stride size, which is the number of shifts over rows/cols/slices to get the
-            next pool region. If stride is None, it is considered equal to ws
-            (no overlap on pooling regions).
-        pad : tuple of N ints or None
-            For each downsampling dimension, this specifies the number of zeros to
-            add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
-            size of the top and bottom margins, pad_w specifies the size of the left and
-            right margins. No padding is added if pad is None.
-        ndim : int
-            The number of pooling dimensions N.
-            The default is 2.
-        ds
-            *deprecated*, use parameter ws instead.
-        st
-            *deprecated*, use parameter st instead.
-        padding
-            *deprecated*, use parameter pad instead.
-        Returns
-        -------
-        list
-            The shape of the output from this op, for input of given shape.
-            This will have the same length as imgshape, but with last N
-            elements reduced as per the downsampling & ignore_border flags.
-        """
-        # check for deprecated parameter names
-        if ds is not None:
-            if ws is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'ws' and 'ds'."
-                    " Please provide a value only to 'ws'."
-                )
-            else:
-                warnings.warn(
-                    "The 'ds' parameter is not going to exist"
-                    " anymore as it is going to be replaced by the parameter"
-                    " 'ws'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                ws = ds
-        elif ds is None and ws is None:
-            raise ValueError("You must provide a tuple value for the window size.")
-        if st is not None:
-            if stride is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'st and 'stride'."
-                    " Please provide a value only to 'stride'."
-                )
-            else:
-                warnings.warn(
-                    "The 'st' parameter is not going to exist"
-                    " anymore as it is going to be replaced by the parameter"
-                    " 'stride'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                stride = st
-        if padding is not None:
-            zero_pad = (0,) * ndim
-            if pad not in {None, zero_pad}:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'padding' and pad."
-                    "  Please provide a value only to pad."
-                )
-            else:
-                warnings.warn(
-                    "The 'padding' parameter is not going to"
-                    " exist anymore as it is going to be replaced by the"
-                    " parameter 'pad'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                pad = padding
-        if ndim is None:
-            ndim = 2
-        assert ndim > 0
-        if len(imgshape) < ndim:
-            raise TypeError(f"imgshape must have at least {ndim} dimensions")
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * ndim
-        patch_shape = tuple(
-            at.extract_constant(imgshape[-ndim + i]) + pad[i] * 2 for i in range(ndim)
-        )
-        def compute_out(v, downsample, stride):
-            if ignore_border:
-                if downsample == stride:
-                    return v // stride
-                else:
-                    out = (v - downsample) // stride + 1
-                    if isinstance(out, Variable):
-                        return tm.maximum(out, 0)
-                    else:
-                        return np.maximum(out, 0)
-            else:
-                if isinstance(v, Variable):
-                    return at.switch(
-                        tm.ge(stride, downsample),
-                        (v - 1) // stride + 1,
-                        tm.maximum(0, (v - 1 - downsample) // stride + 1) + 1,
-                    )
-                elif stride >= downsample:
-                    return (v - 1) // stride + 1
-                else:
-                    return max(0, (v - 1 - downsample + stride) // stride) + 1
-        out_shape = [compute_out(patch_shape[i], ws[i], stride[i]) for i in range(ndim)]
-        rval = list(imgshape[:-ndim]) + out_shape
-        return rval
-    def __init__(self, ignore_border=False, mode="max", ndim=2, openmp=None):
-        super().__init__(openmp=openmp)
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        if mode == "max_deterministic":
-            # It seems max pool algo is already deterministic in CPU.
-            mode = "max"
-        if mode not in ("max", "average_inc_pad", "average_exc_pad", "sum"):
-            raise ValueError(
-                "Pool mode parameter only support 'max', 'sum',"
-                f" 'average_inc_pad' and 'average_exc_pad'. Got {mode}"
-            )
-        self.mode = mode
-    def prepare_node(self, node, storage_map, compute_map, impl):
-        if len(node.inputs) == 1:
-            # Old interface
-            self.ndim = len(node.op.ds)
-            self.mode = node.op.mode
-            ws = at.constant(node.op.ds)
-            st = at.constant(node.op.st)
-            pad = at.constant(node.op.padding)
-            node.inputs.append(ws)
-            node.inputs.append(st)
-            node.inputs.append(pad)
-            if isinstance(ws, Constant):
-                storage_map[ws] = [ws.data]
-                compute_map[ws] = [True]
-            else:
-                storage_map[ws] = [None]
-                compute_map[ws] = [False]
-            if isinstance(st, Constant):
-                storage_map[st] = [st.data]
-                compute_map[st] = [True]
-            else:
-                storage_map[st] = [None]
-                compute_map[st] = [False]
-            if isinstance(pad, Constant):
-                storage_map[pad] = [pad.data]
-                compute_map[pad] = [True]
-            else:
-                storage_map[pad] = [None]
-                compute_map[pad] = [False]
-    def make_node(self, x, ws, stride=None, pad=None):
-        # TODO: consider restricting the dtype?
-        x = at.as_tensor_variable(x)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        elif isinstance(pad, (tuple, list)):
-            if max(pad) != 0 and not self.ignore_border:
-                raise NotImplementedError("padding works only with ignore_border=True")
-            if isinstance(ws, (tuple, list)):
-                if any(pad[i] >= ws[i] for i in range(nd)):
-                    raise NotImplementedError("padding must be smaller than strides")
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert ws.ndim == 1
-        assert stride.ndim == 1
-        assert pad.ndim == 1
-        if x.type.ndim < nd:
-            raise TypeError()
-        if ws.dtype not in int_dtypes:
-            raise TypeError("Pool downsample parameters must be ints.")
-        if stride.dtype not in int_dtypes:
-            raise TypeError("Stride parameters must be ints.")
-        if pad.dtype not in int_dtypes:
-            raise TypeError("Padding parameters must be ints.")
-        # If the input shape are broadcastable we can have 0 in the output shape
-        out_shape = tuple(
-            1 if s == 1 else None for s in x.type.shape[:-nd] + (None,) * nd
-        )
-        out = TensorType(x.dtype, shape=out_shape)
-        return Apply(self, [x, ws, stride, pad], [out()])
-    def perform(self, node, inp, out, params):
-        x, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                f"Pool requires input with {nd} or more dimensions"
-            )
-        z_shape = self.out_shape(x.shape, ws, params.ignore_border, stride, pad, nd)
-        if not params.ignore_border:
-            assert all(z > 0 for z in z_shape[-nd:])
-        if (z[0] is None) or (z[0].shape != z_shape):
-            z[0] = np.empty(z_shape, dtype=x.dtype)
-        zz = z[0]
-        # size of pooling output
-        pool_out_shp = zz.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-        inc_pad = self.mode == "average_inc_pad"
-        # pad the image
-        if max(pad) != 0:
-            y = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            y[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = x
-        else:
-            y = x
-        func = np.max
-        if self.mode == "sum":
-            func = np.sum
-        elif self.mode != "max":
-            func = np.average
-        # precompute the region boundaries for each dimension
-        region_slices = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                start = j * stride[i]
-                end = min(start + ws[i], img_shp[i])
-                if not inc_pad:
-                    start = max(start, pad[i])
-                    end = min(end, img_shp[i] - pad[i])
-                region_slices[i].append(slice(start, end))
-        # iterate over non-pooling dimensions
-        for k in np.ndindex(*x.shape[:-nd]):
-            zzk = zz[k]
-            yk = y[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                zzk[r] = func(yk[[region_slices[i][r[i]] for i in range(nd)]])
-    def infer_shape(self, fgraph, node, in_shapes):
-        ws, stride, pad = [node.inputs[1], node.inputs[2], node.inputs[3]]
-        shp = self.out_shape(
-            in_shapes[0], ws, self.ignore_border, stride, pad, self.ndim
-        )
-        return [shp]
-    def L_op(self, inputs, outputs, grads):
-        x, ws, stride, pad = inputs
-        (gz,) = grads
-        disc = [DisconnectedType()() for i in inputs[1:]]
-        if self.mode == "max":
-            return [
-                MaxPoolGrad(ndim=self.ndim, ignore_border=self.ignore_border)(
-                    x, outputs[0], gz, ws=ws, stride=stride, pad=pad
-                )
-            ] + disc
-        else:
-            return [
-                AveragePoolGrad(
-                    ndim=self.ndim, ignore_border=self.ignore_border, mode=self.mode
-                )(x, gz, ws=ws, stride=stride, pad=pad)
-            ] + disc
-    def connection_pattern(self, node):
-        return [[1], [0], [0], [0]]
-    def R_op(self, inputs, eval_points):
-        if self.mode != "max":
-            # Rop for average or sum is simply pooling evaluated at eval point
-            eval_inputs = [eval_points[0]] + inputs[1:]
-            return [self(*eval_inputs)]
-        # R_op can receive None as eval_points.
-        # That mean there is no diferientiable path through that input
-        # If this imply that you cannot compute some outputs,
-        # return None for those.
-        if eval_points[0] is None:
-            return [None]
-        z = self(*inputs)
-        x, ws, stride, pad = inputs
-        return [
-            DownsampleFactorMaxGradGrad(self.ignore_border, self.mode, self.ndim)(
-                x, z, eval_points[0], ws, stride, pad
-            )
-        ]
-    def c_headers(self, **kwargs):
-        headers = ["<algorithm>"]
-        headers += super().c_headers(**kwargs)
-        return headers
-    def c_code(self, node, name, inp, out, sub):
-        if self.mode not in ("max", "sum", "average_exc_pad", "average_inc_pad"):
-            raise MethodNotDefined()
-        x, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-        params = sub["params"]
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, collector) schedule(static)"
-        else:
-            omp_parallel = ""
-        ccode = """
-        int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
-        if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        int nonzero_padding;
-        nonzero_padding = 0;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-            if (pd[i]>0)
-                nonzero_padding = 1;
-        }
-        if (!%(params)s->ignore_border && nonzero_padding)
-        {
-            PyErr_SetString(PyExc_ValueError,
-              "padding must be zero when ignore border is False");
-            %(fail)s;
-        }
-        if (%(params)s->ignore_border)
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                // '/' in C is different from '/' in python
-                if (r[i] - ws[i] < 0)
-                {
-                  z[i] = 0;
-                }
-                else
-                {
-                  z[i] = (r[i] - ws[i]) / st[i] + 1;
-                }
-            }
-        }
-        else
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                // decide how many rows/cols the output has
-                if (st[i] >= ws[i])
-                {
-                    z[i] = (r[i] - 1) / st[i] + 1;
-                }
-                else
-                {
-                    z[i] = std::max((npy_intp)0, (r[i] - 1 - ws[i] + st[i]) / st[i]) + 1;
-                }
-                assert(z[i] > 0);
-            }
-        }
-        // memory allocation of z if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(z)s) || *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(x)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i] != z[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          if (%(z)s) Py_XDECREF(%(z)s);
-          npy_intp dims[%(total_ndim)s];
-          for (int i=0; i<%(non_pool_ndim)s; i++)
-          {
-              dims[i] = PyArray_DIMS(%(x)s)[i];
-          }
-          for (int i=0; i<%(nd)s; i++)
-          {
-              dims[%(non_pool_ndim)s + i] = z[i];
-          }
-          //TODO: zeros not necessary
-          %(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, dims, typenum,0);
-        }
-        // initialize temp var for the value in a region
-        dtype_%(x)s collector;
-        npy_intp z_prod;
-        // do not run if any z[i] is zero
-        z_prod = 1;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            z_prod *= z[i];
-        }
-        if (z_prod)
-        {
-            // will be used to hold start and end index of a region
-            npy_intp r_st[%(nd)s];
-            npy_intp r_end[%(nd)s];
-            // index for iterating over the pooling regions
-            npy_intp r_idx[%(nd)s];
-            // placeholder for PyArray indexing (output)
-            npy_intp o_idx[%(total_ndim)s];
-            // placeholder for PyArray indexing (input)
-            npy_intp i_idx[%(total_ndim)s];
-            // loop over non-pooling dimensions
-            npy_intp non_pooling_prod = 1;
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-            }
-            %(omp_parallel)s
-            // first loop over non-pooling dimensions
-            for (npy_intp t=0; t<non_pooling_prod; t++)
-            {
-                // compute the non-pooling index in each dimension
-                if (%(non_pool_ndim)s!=0)
-                {
-                    o_idx[0] = t;
-                    i_idx[0] = t;
-                    for (int i=1; i<%(non_pool_ndim)s; i++)
-                    {
-                        o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                        o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                        i_idx[i] = o_idx[i];
-                        i_idx[i - 1] = o_idx[i - 1];
-                    }
-                }
-                // then loop over each region in each pooling dimension
-        """
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  // skip the padding
-                  r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] -= pd[%(i)s];
-                  r_end[%(i)s] -= pd[%(i)s];
-                  // handle the case where no padding, ignore border is True
-                  if (%(params)s->ignore_border)
-                  {
-                    r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
-                  }
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim, params=sub["params"]
-            )
-        ccode += """
-                  // get a pointer to the correct position in the output
-                  dtype_%(z)s * z;
-                  if (%(total_ndim)s == 4)
-                    z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, o_idx[0], o_idx[1], o_idx[2], o_idx[3])));
-                  else
-                    z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s, o_idx)));
-        """
-        if self.mode == "max":
-            for i in range(nd):
-                ccode += """
-                  // set the first index of dimension %(i)s
-                  i_idx[%(non_pool_ndim)s + %(i)s] = r_st[%(i)s];
-                """ % dict(
-                    i=i, non_pool_ndim=non_pool_ndim
-                )
-            ccode += """
-                  // use the first element as the initial value of collector
-                  if (%(total_ndim)s == 4)
-                    collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                  else
-                    collector = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-            """
-            for i in range(nd):
-                ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                    i=i, non_pool_ndim=non_pool_ndim
-                )
-            ccode += """
-                    // update maximum
-                    dtype_%(x)s a;
-                    if (%(total_ndim)s == 4)
-                      a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                    else
-                      a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                    collector = (a > collector) ? a : collector;
-            """
-            for i in range(nd):
-                ccode += """
-                  } // for loop over region
-                """
-            ccode += """
-                  z[0] = collector;
-            """
-        elif self.mode in ("sum", "average_exc_pad", "average_inc_pad"):
-            ccode += """
-                  // initialize the sum at zero
-                  collector = ((dtype_%(x)s)(0));
-            """
-            for i in range(nd):
-                ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                    i=i, non_pool_ndim=non_pool_ndim
-                )
-            ccode += """
-                    // update sum
-                    dtype_%(x)s a;
-                    if (%(total_ndim)s == 4)
-                      a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                    else
-                      a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                    collector += a;
-            """
-            for i in range(nd):
-                ccode += """
-                  } // for loop over region
-                """
-            if self.mode == "sum":
-                ccode += """
-                  z[0] = collector;
-                """
-            elif self.mode == "average_inc_pad" and self.ignore_border:
-                # region size = product over all pooling dimensions
-                region_size = " * ".join(f"ws[{i}]" for i in range(nd))
-                ccode += """
-                  z[0] = collector / (%(region_size)s);
-                """ % dict(
-                    region_size=region_size
-                )
-            else:
-                # region size = number elements of in this region
-                region_size = " * ".join(f"(r_end[{i}]-r_st[{i}])" for i in range(nd))
-                ccode += """
-                  z[0] = collector / (%(region_size)s);
-                """ % dict(
-                    region_size=region_size
-                )
-        for i in range(nd):
-            ccode += """
-            } // loop over pooling dimension
-            """
-        ccode += """
-          } // for loop over non-pooling dimensions
-        } // if z_prod
-        """
-        return ccode % locals()
-    def c_code_cache_version(self):
-        return (10, self.openmp)
-class PoolGrad(OpenMPOp):
-    __props__ = ("ignore_border", "mode", "ndim")
-    @staticmethod
-    def out_shape(
-        imgshape,
-        ws=None,
-        ignore_border=False,
-        stride=None,
-        pad=None,
-        ndim=2,
-        ds=None,
-        st=None,
-        padding=None,
-    ):
-        """Return the shape of the output from this op, for input of given
-        shape and flags.
-        Parameters
-        ----------
-        imgshape : tuple of integers or scalar PyTensor variables
-            the shape of a tensor of images. The last N elements are
-            interpreted as the downsampling dimensions.
-        ws : tuple of N ints
-            downsample factor over rows and columns this parameter
-            indicates the size of the pooling region
-        ignore_border : bool
-            If ws doesn't divide imgshape, do we include an extra row/col/slice
-            of partial downsampling (False) or ignore it (True).
-        stride : list or tuple of N ints or None
-            Stride size, which is the number of shifts over rows/cols/slices to get the
-            next pool region. If stride is None, it is considered equal to ws
-            (no overlap on pooling regions).
-        pad : tuple of N ints or None
-            For each downsampling dimension, this specifies the number of zeros to
-            add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
-            size of the top and bottom margins, pad_w specifies the size of the left and
-            right margins. No padding is added if pad is None.
-        ndim : int
-            The number of pooling dimensions N.
-            The default is 2.
-        ds
-            *deprecated*, use parameter ws instead.
-        st
-            *deprecated*, use parameter st instead.
-        padding
-            *deprecated*, use parameter pad instead.
-        Returns
-        -------
-        list :
-            the shape of the output from this op, for input of given
-            shape.  This will have the same length as imgshape, but
-            with last N elements reduced as per the downsampling &
-            ignore_border flags.
-        """
-        # check for deprecated parameter names
-        if ds is not None:
-            if ws is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'ws' and 'ds'."
-                    " Please provide a value only to 'ws'."
-                )
-            else:
-                warnings.warn(
-                    "The 'ds' parameter in PoolGrad is not going"
-                    " to exist anymore as it is going to be replaced by the"
-                    " parameter 'ws'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                ws = ds
-        elif ds is None and ws is None:
-            raise ValueError("You must provide a tuple value for the window size.")
-        if st is not None:
-            if stride is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'st and 'stride'."
-                    " Please provide a value only to 'stride'."
-                )
-            else:
-                warnings.warn(
-                    "The 'st' parameter in PoolGrad is not going"
-                    " to exist anymore as it is going to be replaced by the"
-                    " parameter 'stride'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                stride = st
-        if padding is not None:
-            if pad is not None:
-                raise ValueError(
-                    "You can't provide a tuple value to both 'padding' and pad."
-                    "  Please provide a value only to pad."
-                )
-            else:
-                warnings.warn(
-                    "The 'padding' parameter in PoolGrad is not"
-                    " going to exist anymore as it is going to be replaced"
-                    " by the parameter 'pad'.",
-                    category=DeprecationWarning,
-                    stacklevel=2,
-                )
-                pad = padding
-        if len(imgshape) < ndim:
-            raise TypeError(f"imgshape must have at least {ndim} dimensions")
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * ndim
-        patch_shape = tuple(
-            at.extract_constant(imgshape[-ndim + i]) + pad[i] * 2 for i in range(ndim)
-        )
-        def compute_out(v, downsample, stride):
-            if ignore_border:
-                out = (v - downsample) // stride + 1
-                if isinstance(out, Variable):
-                    return tm.maximum(out, 0)
-                else:
-                    return np.maximum(out, 0)
-            else:
-                if isinstance(v, Variable):
-                    return at.switch(
-                        tm.ge(stride, downsample),
-                        (v - 1) // stride + 1,
-                        tm.maximum(0, (v - 1 - downsample) // stride + 1) + 1,
-                    )
-                elif stride >= downsample:
-                    return (v - 1) // stride + 1
-                else:
-                    return max(0, (v - 1 - downsample) // stride + 1) + 1
-        out_shape = [compute_out(patch_shape[i], ws[i], stride[i]) for i in range(ndim)]
-        rval = list(imgshape[:-ndim]) + out_shape
-        return rval
-    def __init__(self, ignore_border, mode="max", ndim=2, openmp=None):
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        if mode == "max_deterministic":
-            # It seems max pool grad algo is already deterministic in CPU.
-            mode = "max"
-        if mode not in ("max", "sum", "average_inc_pad", "average_exc_pad"):
-            raise ValueError(
-                "Pool mode parameter only support 'max', 'sum',"
-                " 'average_inc_pad' and 'average_exc_pad'. Got {mode}"
-            )
-        self.mode = mode
-        super().__init__(openmp=openmp)
-    def prepare_node(self, node, storage_map, compute_map, impl):
-        if len(node.inputs) < 5:  # 5 for AveragePoolGrad, 6 for MaxPoolGrad
-            # Old interface
-            self.ndim = len(node.op.ds)
-            self.mode = node.op.mode
-            ws = at.constant(node.op.ds)
-            st = at.constant(node.op.st)
-            pad = at.constant(node.op.padding)
-            node.inputs.append(ws)
-            node.inputs.append(st)
-            node.inputs.append(pad)
-            if isinstance(ws, Constant):
-                storage_map[ws] = [ws.data]
-                compute_map[ws] = [True]
-            else:
-                storage_map[ws] = [None]
-                compute_map[ws] = [False]
-            if isinstance(st, Constant):
-                storage_map[st] = [st.data]
-                compute_map[st] = [True]
-            else:
-                storage_map[st] = [None]
-                compute_map[st] = [False]
-            if isinstance(pad, Constant):
-                storage_map[pad] = [pad.data]
-                compute_map[pad] = [True]
-            else:
-                storage_map[pad] = [None]
-                compute_map[pad] = [False]
-    def infer_shape(self, fgraph, node, in_shapes):
-        return [in_shapes[0]]
-class MaxPoolGrad(PoolGrad):
-    # params_type ignore_border don't change c code
-    def __init__(self, ignore_border, ndim=2, openmp=None):
-        PoolGrad.__init__(self, ignore_border, mode="max", ndim=ndim, openmp=openmp)
-    def make_node(self, x, maxout, gz, ws, stride=None, pad=None):
-        # make_node should only be called by the grad function of
-        # Pool, so these asserts should not fail.
-        x = at.as_tensor_variable(x)
-        maxout = at.as_tensor_variable(maxout)
-        gz = at.as_tensor_variable(gz)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert isinstance(x, Variable) and x.ndim >= nd
-        assert isinstance(maxout, Variable) and maxout.ndim >= nd
-        assert isinstance(gz, Variable) and gz.ndim >= nd
-        assert isinstance(ws, Variable) and ws.ndim == 1
-        assert isinstance(stride, Variable) and stride.ndim == 1
-        assert isinstance(pad, Variable) and pad.ndim == 1
-        assert x.ndim == maxout.ndim == gz.ndim >= nd
-        if ws.dtype not in int_dtypes:
-            raise TypeError("Pool downsample parameters must be ints.")
-        if stride.dtype not in int_dtypes:
-            raise TypeError("Stride parameters must be ints.")
-        if pad.dtype not in int_dtypes:
-            raise TypeError("Padding parameters must be ints.")
-        return Apply(self, [x, maxout, gz, ws, stride, pad], [x.type()])
-    def perform(self, node, inp, out):
-        assert self.mode == "max"
-        x, maxout, gz, ws, stride, pad = inp
-        (gx_stg,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                f"MaxPoolGrad requires input with {nd} or more dimensions"
-            )
-        pool_out_shp = maxout.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-        # pad the image
-        if max(pad) != 0:
-            y = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            y[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = x
-        else:
-            y = x
-        gx = np.zeros_like(y)
-        # precompute the region boundaries for each dimension
-        region_ranges = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                start = max(j * stride[i], pad[i])
-                end = min(start + ws[i], img_shp[i])
-                region_ranges[i].append(range(start, end))
-        # iterate over non-pooling dimensions
-        for k in np.ndindex(*x.shape[:-nd]):
-            gxk = gx[k]
-            gzk = gz[k]
-            yk = y[k]
-            maxoutk = maxout[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                maxout_value = maxoutk[r]
-                # iterate inside region
-                for c in itertools.product(
-                    *[region_ranges[i][r[i]] for i in range(nd)]
-                ):
-                    if maxout_value == yk[c]:
-                        gxk[c] += gzk[r]
-        # unpad the image
-        gx = gx[
-            (slice(None),) * (len(x.shape) - nd)
-            + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-        ]
-        gx_stg[0] = gx
-    def grad(self, inp, grads):
-        x, maxout, gz, ws, stride, pad = inp
-        (ggx,) = grads
-        return [
-            at.zeros_like(x),
-            at.zeros_like(maxout),
-            DownsampleFactorMaxGradGrad(
-                ndim=self.ndim, ignore_border=self.ignore_border
-            )(x, maxout, ggx, ws, stride, pad),
-        ] + [DisconnectedType()() for i in inp[3:]]
-    def connection_pattern(self, node):
-        return [[1], [1], [1], [0], [0], [0]]
-    def c_code(self, node, name, inp, out, sub):
-        assert self.mode == "max"
-        x, z, gz, ws, stride, pad = inp
-        (gx,) = out
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)"
-        else:
-            omp_parallel = ""
-        ccode = """
-        // sanity checks
-        int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
-        int z_typenum = PyArray_ObjectType((PyObject*)%(z)s, 0);
-        int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0);
-        if ((x_typenum != z_typenum) || (x_typenum != gz_typenum))
-        {
-            PyErr_SetString(PyExc_ValueError, "input types must all match");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(z)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "z must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(gz)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "gz must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        int nonzero_padding;
-        nonzero_padding = 0;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            z[i] = PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i];
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-            if (pd[i]>0)
-                nonzero_padding = 1;
-        }
-        // allocating memory for output, if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(gx)s) || !PyArray_ISCONTIGUOUS(%(gx)s)
-            || *PyArray_DIMS(%(gx)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(total_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(gx)s)[i] != PyArray_DIMS(%(x)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          Py_XDECREF(%(gx)s);
-          %(gx)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(x)s), x_typenum,0);
-        }
-        else {
-          PyArray_FILLWBYTE(%(gx)s, 0);
-        }
-        dtype_%(z)s maximum; // temp var for maximum value in a region
-        npy_intp z_prod;
-        // do not run if any z[i] is zero
-        z_prod = 1;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            z_prod *= z[i];
-        }
-        if (z_prod)
-        {
-            // will be used to hold start and end index of a region
-            npy_intp r_st[%(nd)s];
-            npy_intp r_end[%(nd)s];
-            // index for iterating over the pooling regions
-            npy_intp r_idx[%(nd)s];
-            // placeholder for PyArray indexing (output)
-            npy_intp o_idx[%(total_ndim)s];
-            // placeholder for PyArray indexing (input)
-            npy_intp i_idx[%(total_ndim)s];
-            // loop over non-pooling dimensions
-            npy_intp non_pooling_prod = 1;
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-            }
-            %(omp_parallel)s
-            // first loop over non-pooling dimensions
-            for (npy_intp t=0; t<non_pooling_prod; t++)
-            {
-                // compute the non-pooling index in each dimension
-                if (%(non_pool_ndim)s!=0)
-                {
-                    o_idx[0] = t;
-                    i_idx[0] = t;
-                    for (int i=1; i<%(non_pool_ndim)s; i++)
-                    {
-                        o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                        o_idx[i - 1] =o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                        i_idx[i] = o_idx[i];
-                        i_idx[i - 1] = o_idx[i - 1];
-                    }
-                }
-                // then loop over each region in each pooling dimension
-        """
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  // skip the padding
-                  r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] -= pd[%(i)s];
-                  r_end[%(i)s] -= pd[%(i)s];
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                  dtype_%(gz)s * gz;
-                  if (%(total_ndim)s == 4)
-                  {
-                    // the maximum value
-                    maximum = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
-                    // the gradient corresponding to this maximum value in z
-                    gz = ((dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
-                  }
-                  else
-                  {
-                    // the maximum value
-                    maximum = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s,o_idx)))[0];
-                    // the gradient corresponding to this maximum value in z
-                    gz = ((dtype_%(gz)s*)(PyArray_GetPtr(%(gz)s, o_idx)));
-                  }
-        """
-        for i in range(nd):
-            ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                    dtype_%(x)s a;
-                    dtype_%(gx)s * gx;
-                    if (%(total_ndim)s == 4)
-                    {
-                      a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                      gx = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
-                    }
-                    else
-                    {
-                      a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                      gx = ((dtype_%(gx)s*)(PyArray_GetPtr(%(gx)s, i_idx)));
-                    }
-                    if (a == maximum){
-                      gx[0] = gx[0] + gz[0];
-                    }
-        """
-        for i in range(nd):
-            ccode += """
-                  } // for loop over region
-                """
-        for i in range(nd):
-            ccode += """
-                } // loop over pooling dimension
-            """
-        ccode += """
-            } // for loop over non-pooling dimensions
-        } // if z_prod
-        """
-        return ccode % locals()
-    def c_code_cache_version(self):
-        return (0, 11, self.openmp)
-class AveragePoolGrad(PoolGrad):
-    # ignore_border is used for perform, but not c code. No need in params_type
-    def __init__(self, ignore_border, mode="average_inc_pad", ndim=2):
-        assert mode in ("sum", "average_inc_pad", "average_exc_pad")
-        PoolGrad.__init__(self, ignore_border, mode, ndim)
-    # There is an extra dummy parameter to match the parameter count
-    # of MaxPoolGrad.  They have to keep the same interface because of
-    # the DownsampleFactorMaxGrad trick to keep old scripts working
-    # (see downsample.py for details on this).
-    def make_node(self, x, gz, ws, stride=None, pad=None, dummy=None):
-        # make_node should only be called by the grad function of
-        # Pool, so these asserts should not fail.
-        x = at.as_tensor_variable(x)
-        gz = at.as_tensor_variable(gz)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert isinstance(x, Variable) and x.ndim >= nd
-        assert isinstance(gz, Variable) and gz.ndim >= nd
-        assert isinstance(ws, Variable) and ws.ndim == 1
-        assert isinstance(stride, Variable) and stride.ndim == 1
-        assert x.ndim == gz.ndim >= nd
-        assert isinstance(pad, Variable) and pad.ndim == 1
-        if ws.dtype not in int_dtypes:
-            raise TypeError("Pool downsample parameters must be ints.")
-        if stride.dtype not in int_dtypes:
-            raise TypeError("Stride parameters must be ints.")
-        if pad.dtype not in int_dtypes:
-            raise TypeError("Padding parameters must be ints.")
-        return Apply(self, [x, gz, ws, stride, pad], [x.type()])
-    def perform(self, node, inp, out):
-        x, gz, ws, stride, pad = inp
-        (gx_stg,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                f"AveragePoolGrad requires input with {nd} or more dimensions"
-            )
-        if self.mode == "average_exc_pad" and max(pad) != 0:
-            raise NotImplementedError()
-        z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad, nd)
-        if (gx_stg[0] is None) or (gx_stg[0].shape != z_shape):
-            gx_stg[0] = np.empty(z_shape, dtype=x.dtype)
-        zz = gx_stg[0]
-        # size of pooling output
-        pool_out_shp = zz.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-        inc_pad = self.mode == "average_inc_pad"
-        sum_mode = self.mode == "sum"
-        # initialize the padded output
-        gx = np.zeros((x.shape[:-nd] + img_shp), dtype=x.dtype)
-        # precompute the region boundaries and sizes for each dimension
-        region_slices = [[] for i in range(nd)]
-        region_sizes = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                if sum_mode or inc_pad:
-                    start = j * stride[i]
-                else:
-                    start = max(j * stride[i], pad[i])
-                end = min(start + ws[i], img_shp[i])
-                region_slices[i].append(slice(start, end))
-                region_sizes[i].append(end - start)
-        # iterate over non-pooling dimensions
-        region_slice = [None] * nd
-        for k in np.ndindex(*x.shape[:-nd]):
-            gzk = gz[k]
-            gxk = gx[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                region_size = 1
-                for i in range(nd):
-                    region_slice[i] = region_slices[i][r[i]]
-                    region_size *= region_sizes[i][r[i]]
-                if sum_mode:
-                    val = gzk[r]
-                else:
-                    # divide by region size
-                    val = gzk[r] / region_size
-                gxk[region_slice] += val
-        # unpad the image
-        gx = gx[
-            (slice(None),) * (len(x.shape) - nd)
-            + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-        ]
-        gx_stg[0] = gx
-    def grad(self, inp, grads):
-        x, gz, ws, stride, pad = inp
-        (ggx,) = grads
-        return [
-            at.zeros_like(x),
-            Pool(ignore_border=self.ignore_border, ndim=self.ndim, mode=self.mode)(
-                ggx, ws, stride, pad
-            ),
-        ] + [DisconnectedType()() for i in inp[2:]]
-    def connection_pattern(self, node):
-        return [[1], [1], [0], [0], [0]]
-    def c_code(self, node, name, inp, out, sub):
-        x, gz, ws, stride, pad = inp
-        (gx,) = out
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-        inc_pad = int(self.mode == "average_inc_pad")
-        sum_mode = int(self.mode == "sum")
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_pad_width, r_idx, i_idx, o_idx) schedule(static)"
-        else:
-            omp_parallel = ""
-        ccode = """
-        // sanity checks
-        int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
-        int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0);
-        if (x_typenum != gz_typenum)
-        {
-            PyErr_SetString(PyExc_ValueError, "input types must all match");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(gz)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "gz must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        int nonzero_padding;
-        nonzero_padding = 0;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            z[i] = PyArray_DIMS(%(gz)s)[%(non_pool_ndim)s + i];
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-            if (pd[i]>0)
-                nonzero_padding = 1;
-        }
-        if (!%(inc_pad)s && !%(sum_mode)s && nonzero_padding)
-        {
-            PyErr_SetString(PyExc_ValueError,
-              "padding must be zero for average_exc_pad");
-            %(fail)s;
-        }
-        // allocating memory for output, if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(gx)s) || !PyArray_ISCONTIGUOUS(%(gx)s)
-            || *PyArray_DIMS(%(gx)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(total_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(gx)s)[i] != PyArray_DIMS(%(x)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          Py_XDECREF(%(gx)s);
-          %(gx)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(x)s), x_typenum,0);
-        }
-        else {
-          PyArray_FILLWBYTE(%(gx)s, 0);
-        }
-        npy_intp z_prod;
-        // do not run if any z[i] is zero
-        z_prod = 1;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            z_prod *= z[i];
-        }
-        if (z_prod)
-        {
-            // will be used to hold start and end index of a region
-            npy_intp r_st[%(nd)s];
-            npy_intp r_end[%(nd)s];
-            // padded region size
-            npy_intp r_pad_width[%(nd)s];
-            // index for iterating over the pooling regions
-            npy_intp r_idx[%(nd)s];
-            // placeholder for PyArray indexing (output)
-            npy_intp o_idx[%(total_ndim)s];
-            // placeholder for PyArray indexing (input)
-            npy_intp i_idx[%(total_ndim)s];
-            // loop over non-pooling dimensions
-            npy_intp non_pooling_prod = 1;
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-            }
-            %(omp_parallel)s
-            // first loop over non-pooling dimensions
-            for (npy_intp t=0; t<non_pooling_prod; t++)
-            {
-                // compute the non-pooling index in each dimension
-                if (%(non_pool_ndim)s!=0)
-                {
-                    o_idx[0] = t;
-                    i_idx[0] = t;
-                    for (int i=1; i<%(non_pool_ndim)s; i++)
-                    {
-                        o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                        o_idx[i - 1] =o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                        i_idx[i] = o_idx[i];
-                        i_idx[i - 1] = o_idx[i - 1];
-                    }
-                }
-                // then loop over each region in each pooling dimension
-        """
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  if (!%(sum_mode)s && !%(inc_pad)s && r_st[%(i)s] < pd[%(i)s])
-                  {
-                    r_st[%(i)s] = pd[%(i)s];
-                  }
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
-                  r_pad_width[%(i)s] = r_end[%(i)s] - r_st[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] = r_st[%(i)s] - pd[%(i)s] > 0 ? r_st[%(i)s] - pd[%(i)s] : 0;
-                  r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] - pd[%(i)s] ? r[%(i)s] - 2 * pd[%(i)s] : r_end[%(i)s] - pd[%(i)s];
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, sum_mode=sum_mode, inc_pad=inc_pad, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                  dtype_%(gz)s * gz;
-                  dtype_%(gz)s val;
-                  if (%(total_ndim)s == 4)
-                  {
-                    // the gradient for this region
-                    gz = ((dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
-                  }
-                  else
-                  {
-                    // the gradient for this region
-                    gz = ((dtype_%(gz)s*)(PyArray_GetPtr(%(gz)s, o_idx)));
-                  }
-                  // compute the contribution
-                  if (%(sum_mode)s)
-                  {
-                    val = gz[0];
-                  }
-                  else
-                  {
-                    val = gz[0] / (%(region_size)s);
-                  }
-        """
-        region_size = " * ".join(f"r_pad_width[{i}]" for i in range(nd))
-        for i in range(nd):
-            ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                    dtype_%(gx)s * gx;
-                    if (%(total_ndim)s == 4)
-                    {
-                      gx = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
-                    }
-                    else
-                    {
-                      gx = ((dtype_%(gx)s*)(PyArray_GetPtr(%(gx)s, i_idx)));
-                    }
-                    gx[0] = gx[0] + val;
-        """
-        for i in range(nd):
-            ccode += """
-                  } // for loop over region
-                """
-        for i in range(nd):
-            ccode += """
-                } // loop over pooling dimension
-            """
-        ccode += """
-            } // for loop over non-pooling dimensions
-        } // if z_prod
-        """
-        return ccode % locals()
-    def c_code_cache_version(self):
-        return (0, 4, self.openmp)
-class DownsampleFactorMaxGradGrad(OpenMPOp):
-    __props__ = ("ignore_border", "mode", "ndim")
-    def __init__(self, ignore_border, mode="max", ndim=2, openmp=None):
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        self.mode = mode
-        super().__init__(openmp=openmp)
-        assert self.mode == "max"
-    def make_node(self, x, maxout, gz, ws, stride=None, pad=None):
-        # make_node should only be called by the grad function of
-        # MaxPoolGrad, so these asserts should not fail.
-        x = at.as_tensor_variable(x)
-        maxout = at.as_tensor_variable(maxout)
-        gz = at.as_tensor_variable(gz)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        elif isinstance(pad, (tuple, list)):
-            if max(pad) != 0 and not self.ignore_border:
-                raise NotImplementedError("padding works only with ignore_border=True")
-            if isinstance(ws, (tuple, list)):
-                if any(pad[i] >= ws[i] for i in range(nd)):
-                    raise NotImplementedError("padding must be smaller than strides")
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert ws.ndim == 1
-        assert stride.ndim == 1
-        assert pad.ndim == 1
-        assert x.ndim == maxout.ndim == gz.ndim >= nd
-        if ws.dtype not in int_dtypes:
-            raise TypeError("Pool downsample parameters must be ints.")
-        if stride.dtype not in int_dtypes:
-            raise TypeError("Stride parameters must be ints.")
-        if pad.dtype not in int_dtypes:
-            raise TypeError("Padding parameters must be ints.")
-        return Apply(self, [x, maxout, gz, ws, stride, pad], [x.type()])
-    def perform(self, node, inp, out):
-        x, maxout, ggx, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                "DownsampleFactorMaxGradGrad requires input "
-                "with {} or more dimensions".format(nd)
-            )
-        if (z[0] is None) or (z[0].shape != maxout.shape):
-            z[0] = np.zeros(maxout.shape, dtype=x.dtype)
-        ggz = z[0]  # grad wrt maxout_grad has the same shape as maxout
-        # size of pooling output
-        pool_out_shp = ggz.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-        # pad the image and its gradients
-        if max(pad) > 0:
-            y_padded = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            y_padded[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = x
-            ggx_padded = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            ggx_padded[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = ggx
-        else:
-            y_padded = x
-            ggx_padded = ggx
-        # precompute the region boundaries for each dimension
-        region_ranges = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                start = j * stride[i]
-                end = min(start + ws[i], img_shp[i])
-                region_ranges[i].append(range(start, end))
-        # iterate over non-pooling dimensions
-        for k in np.ndindex(*x.shape[:-nd]):
-            ggxk = ggx_padded[k]
-            ggzk = ggz[k]
-            yk = y_padded[k]
-            maxoutk = maxout[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                # iterate inside region
-                maxout_value = maxoutk[r]
-                for c in itertools.product(
-                    *[region_ranges[i][r[i]] for i in range(nd)]
-                ):
-                    if maxout_value == yk[c]:
-                        ggzk[r] += ggxk[c]
-    def infer_shape(self, fgraph, node, in_shapes):
-        return [in_shapes[1]]
-    def grad(self, inp, grads):
-        x, maxout, ggx, ws, stride, pad = inp
-        (gz,) = grads
-        return [
-            at.zeros_like(x),
-            at.zeros_like(maxout),
-            MaxPoolGrad(ignore_border=self.ignore_border, ndim=self.ndim)(
-                x, maxout, gz, ws, stride, pad
-            ),
-            DisconnectedType()(),
-            DisconnectedType()(),
-            DisconnectedType()(),
-        ]
-    def connection_pattern(self, node):
-        return [[1], [1], [1], [0], [0], [0]]
-    def c_code(self, node, name, inp, out, sub):
-        if self.mode != "max":
-            raise MethodNotDefined()
-        x, maxout, ggx, ws, stride, pad = inp
-        (z,) = out  # the grad of grad
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)"
-        else:
-            omp_parallel = ""
-        ccode = """
-        int z_typenum = PyArray_ObjectType((PyObject*)%(maxout)s, 0);
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            z[i] = PyArray_DIMS(%(maxout)s)[%(non_pool_ndim)s + i];
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-        }
-        // allocating memory for output, if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(z)s) || !PyArray_ISCONTIGUOUS(%(z)s)
-            || *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(total_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(maxout)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          Py_XDECREF(%(z)s);
-          %(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(maxout)s), z_typenum,0);
-        }
-        else {
-          PyArray_FILLWBYTE(%(z)s, 0);
-        }
-        dtype_%(maxout)s maximum; // temp var for maximum value in a region
-        // will be used to hold start and end index of a region
-        npy_intp r_st[%(nd)s];
-        npy_intp r_end[%(nd)s];
-        // index for iterating over the pooling regions
-        npy_intp r_idx[%(nd)s];
-        // placeholder for PyArray indexing (output)
-        npy_intp o_idx[%(total_ndim)s];
-        // placeholder for PyArray indexing (input)
-        npy_intp i_idx[%(total_ndim)s];
-        // loop over non-pooling dimensions
-        npy_intp non_pooling_prod;
-        non_pooling_prod = 1;
-        for (int i=0; i<%(non_pool_ndim)s; i++)
-        {
-            non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-        }
-        %(omp_parallel)s
-        // first loop over non-pooling dimensions
-        for (npy_intp t=0; t<non_pooling_prod; t++)
-        {
-            // compute the non-pooling index in each dimension
-            if (%(non_pool_ndim)s!=0)
-            {
-                o_idx[0] = t;
-                i_idx[0] = t;
-                for (int i=1; i<%(non_pool_ndim)s; i++)
-                {
-                    o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                    o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                    i_idx[i] = o_idx[i];
-                    i_idx[i - 1] = o_idx[i - 1];
-                }
-            }
-            // then loop over each region in each pooling dimension
-        """
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  // skip the padding
-                  r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] -= pd[%(i)s];
-                  r_end[%(i)s] -= pd[%(i)s];
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                  dtype_%(z)s * z;
-                  if (%(total_ndim)s == 4)
-                  {
-                    // the maximum value
-                    maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
-                    // z at this position
-                    z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
-                  }
-                  else
-                  {
-                    // the maximum value
-                    maximum = ((dtype_%(maxout)s*)(PyArray_GetPtr(%(maxout)s,o_idx)))[0];
-                    // z at this position
-                    z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s,o_idx)));
-                  }
-        """
-        for i in range(nd):
-            ccode += """
-                  // go through the pooled region in the unpadded input
-                  for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-                  {
-                    i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-                """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                    dtype_%(x)s a;
-                    dtype_%(ggx)s * ggx;
-                    if (%(total_ndim)s == 4)
-                    {
-                      a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                      ggx = ((dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
-                    }
-                    else
-                    {
-                      a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                      ggx = ((dtype_%(ggx)s*)(PyArray_GetPtr(%(ggx)s,i_idx)));
-                    }
-                    if (a == maximum){
-                      z[0] += ggx[0];
-                    }
-        """
-        for i in range(nd):
-            ccode += """
-                  } // for loop over region
-                """
-        for i in range(nd):
-            ccode += """
-              } // loop over pooling dimension
-            """
-        ccode += """
-          } // for loop over non-pooling dimensions
-        """
-        return ccode % locals()
-    def c_code_cache_version(self):
-        return (0, 5, self.openmp)
-class MaxPoolRop(OpenMPOp):
-    """
-    Implements the R-operator for the downsample operation.
-    Parameters
-    ----------
-    ws : list or tuple of N ints
-        Downsample factor over rows, columns etc.
-        ws indicates the size of the pooling region.
-    ignore_border : bool
-        If ws doesn't divide imgshape, do we include an extra row/col/slice
-        of partial downsampling (False) or ignore it (True).
-    stride : list or tuple of N ints or None
-        Stride size, which is the number of shifts over rows/cols/slices to get the
-        next pool region. If stride is None, it is considered equal to ws
-        (no overlap on pooling regions).
-    pad : tuple of N ints or None
-        For each downsampling dimension, this specifies the number of zeros to
-        add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
-        size of the top and bottom margins, pad_w specifies the size of the left and
-        right margins. No padding is added if pad is None.
-    mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
-        ('average_inc_pad' excludes the padding from the count,
-        'average_exc_pad' include it)
-    ndim : int
-        The number of pooling dimensions N.
-        The default is 2.
-    """
-    __props__ = ("ignore_border", "mode", "ndim")
-    params_type = ParamsType(
-        ignore_border=bool_t,
-    )
-    def __init__(self, ignore_border=False, mode="max", ndim=2, openmp=None):
-        super().__init__(openmp=openmp)
-        self.ndim = ndim
-        self.ignore_border = ignore_border
-        self.mode = mode
-        assert mode == "max"
-    def make_node(self, x, eval_point, ws, stride=None, pad=None):
-        # TODO: consider restricting the dtype?
-        x = at.as_tensor_variable(x)
-        eval_point = at.as_tensor_variable(eval_point)
-        nd = self.ndim
-        if stride is None:
-            stride = ws
-        if pad is None:
-            pad = (0,) * nd
-        elif isinstance(pad, (tuple, list)):
-            if max(pad) != 0 and not self.ignore_border:
-                raise NotImplementedError("padding works only with ignore_border=True")
-            if isinstance(ws, (tuple, list)):
-                if any(pad[i] >= ws[i] for i in range(nd)):
-                    raise NotImplementedError("padding must be smaller than strides")
-        ws = at.as_tensor_variable(ws)
-        stride = at.as_tensor_variable(stride)
-        pad = at.as_tensor_variable(pad)
-        assert ws.ndim == 1
-        assert stride.ndim == 1
-        assert pad.ndim == 1
-        if x.type.ndim < nd:
-            raise TypeError()
-        if not ws.dtype.startswith("int"):
-            raise TypeError("Pool downsample parameters must be ints.")
-        if not stride.dtype.startswith("int"):
-            raise TypeError("Stride parameters must be ints.")
-        if not pad.dtype.startswith("int"):
-            raise TypeError("Padding parameters must be ints.")
-        # If the input shape are broadcastable we can have 0 in the output shape
-        out_shape = tuple(
-            1 if s == 1 else None for s in x.type.shape[:-nd] + (None,) * nd
-        )
-        out = TensorType(eval_point.dtype, shape=out_shape)
-        return Apply(self, [x, eval_point, ws, stride, pad], [out()])
-    def perform(self, node, inp, out, params):
-        x, ex, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        assert ws.shape == stride.shape == pad.shape == (nd,)
-        if len(x.shape) < nd:
-            raise NotImplementedError(
-                f"Pool requires input with {nd} or more dimensions"
-            )
-        z_shape = Pool.out_shape(x.shape, ws, params.ignore_border, stride, pad, nd)
-        if not self.ignore_border:
-            assert all(z > 0 for z in z_shape[-nd:])
-        if (z[0] is None) or (z[0].shape != z_shape):
-            z[0] = np.empty(z_shape, dtype=x.dtype)
-        zz = z[0]
-        # size of pooling output
-        pool_out_shp = zz.shape[-nd:]
-        img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in range(nd))
-        inc_pad = self.mode == "average_inc_pad"
-        # pad the image and the eval point
-        if max(pad) != 0:
-            y = np.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
-            y[
-                (slice(None),) * (len(x.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = x
-            ey = np.zeros(ex.shape[:-nd] + img_shp, dtype=ex.dtype)
-            ey[
-                (slice(None),) * (len(ex.shape) - nd)
-                + tuple(slice(pad[i], img_shp[i] - pad[i]) for i in range(nd))
-            ] = ex
-        else:
-            y = x
-            ey = ex
-        # precompute the region boundaries for each dimension
-        region_slices = [[] for i in range(nd)]
-        for i in range(nd):
-            for j in range(pool_out_shp[i]):
-                start = j * stride[i]
-                end = min(start + ws[i], img_shp[i])
-                if not inc_pad:
-                    start = max(start, pad[i])
-                    end = min(end, img_shp[i] - pad[i])
-                region_slices[i].append(slice(start, end))
-        # iterate over non-pooling dimensions
-        for k in np.ndindex(*x.shape[:-nd]):
-            zzk = zz[k]
-            yk = y[k]
-            eyk = ey[k]
-            # iterate over pooling regions
-            for r in np.ndindex(*pool_out_shp):
-                # current slice in padded input
-                ykslice = yk[[region_slices[i][r[i]] for i in range(nd)]]
-                # current slice in eval points
-                eykslice = eyk[[region_slices[i][r[i]] for i in range(nd)]]
-                # indices of maximum
-                idx = np.unravel_index(np.argmax(ykslice), ykslice.shape)
-                zzk[r] = eykslice[idx]
-    def c_headers(self, **kwargs):
-        headers = ["<algorithm>"]
-        headers += super().c_headers(**kwargs)
-        return headers
-    def c_code(self, node, name, inp, out, sub):
-        if self.mode != "max":
-            raise MethodNotDefined()
-        x, ex, ws, stride, pad = inp
-        (z,) = out
-        nd = self.ndim
-        total_ndim = node.inputs[0].ndim
-        non_pool_ndim = total_ndim - nd
-        fail = sub["fail"]
-        params = sub["params"]
-        if self.openmp:
-            # run in parallel over each pooling block
-            omp_parallel = "#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, collector, eval_collector) schedule(static)"
-        else:
-            omp_parallel = ""
-        ccode = """
-        int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
-        if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_NDIM(%(ex)s)!=%(total_ndim)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "eval_point must be a %(total_ndim)sD ndarray");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
-        {
-            PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
-            %(fail)s;
-        }
-        npy_intp z[%(nd)s]; // shape of the output
-        npy_intp r[%(nd)s]; // shape of the padded_input
-        npy_intp ws[%(nd)s];
-        npy_intp st[%(nd)s];
-        npy_intp pd[%(nd)s];
-        int nonzero_padding;
-        nonzero_padding = 0;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            ws[i] = *((dtype_%(ws)s*)PyArray_GETPTR1(%(ws)s, i));
-            st[i] = *((dtype_%(stride)s*)PyArray_GETPTR1(%(stride)s, i));
-            pd[i] = *((dtype_%(pad)s*)PyArray_GETPTR1(%(pad)s, i));
-            r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
-            if (pd[i]>0)
-                nonzero_padding = 1;
-        }
-        if (!%(params)s->ignore_border && nonzero_padding)
-        {
-            PyErr_SetString(PyExc_ValueError,
-              "padding must be zero when ignore border is False");
-            %(fail)s;
-        }
-        if (%(params)s->ignore_border)
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                // '/' in C is different from '/' in python
-                if (r[i] - ws[i] < 0)
-                {
-                  z[i] = 0;
-                }
-                else
-                {
-                  z[i] = (r[i] - ws[i]) / st[i] + 1;
-                }
-            }
-        }
-        else
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                // decide how many rows/cols the output has
-                if (st[i] >= ws[i])
-                {
-                    z[i] = (r[i] - 1) / st[i] + 1;
-                }
-                else
-                {
-                    z[i] = std::max((npy_intp)0, (r[i] - 1 - ws[i] + st[i]) / st[i]) + 1;
-                }
-                assert(z[i] > 0);
-            }
-        }
-        // memory allocation of z if necessary
-        int mem_nec;
-        mem_nec = 0;
-        if ((!%(z)s) || *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
-        {
-            mem_nec = 1;
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(x)s)[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (!mem_nec)
-        {
-            for (int i=0; i<%(nd)s; i++)
-            {
-                if (PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i] != z[i])
-                {
-                    mem_nec = 1;
-                    break;
-                }
-            }
-        }
-        if (mem_nec)
-        {
-          if (%(z)s) Py_XDECREF(%(z)s);
-          npy_intp dims[%(total_ndim)s];
-          for (int i=0; i<%(non_pool_ndim)s; i++)
-          {
-              dims[i] = PyArray_DIMS(%(x)s)[i];
-          }
-          for (int i=0; i<%(nd)s; i++)
-          {
-              dims[%(non_pool_ndim)s + i] = z[i];
-          }
-          //TODO: zeros not necessary
-          %(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, dims, typenum,0);
-        }
-        // initialize temp var for the value in a region
-        dtype_%(x)s collector;
-        dtype_%(ex)s eval_collector;
-        npy_intp z_prod;
-        // do not run if any z[i] is zero
-        z_prod = 1;
-        for (int i=0; i<%(nd)s; i++)
-        {
-            z_prod *= z[i];
-        }
-        if (z_prod)
-        {
-            // will be used to hold start and end index of a region
-            npy_intp r_st[%(nd)s];
-            npy_intp r_end[%(nd)s];
-            // index for iterating over the pooling regions
-            npy_intp r_idx[%(nd)s];
-            // placeholder for PyArray indexing (output)
-            npy_intp o_idx[%(total_ndim)s];
-            // placeholder for PyArray indexing (input)
-            npy_intp i_idx[%(total_ndim)s];
-            // loop over non-pooling dimensions
-            npy_intp non_pooling_prod = 1;
-            for (int i=0; i<%(non_pool_ndim)s; i++)
-            {
-                non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
-            }
-            %(omp_parallel)s
-            // first loop over non-pooling dimensions
-            for (npy_intp t=0; t<non_pooling_prod; t++)
-            {
-                // compute the non-pooling index in each dimension
-                if (%(non_pool_ndim)s!=0)
-                {
-                    o_idx[0] = t;
-                    i_idx[0] = t;
-                    for (int i=1; i<%(non_pool_ndim)s; i++)
-                    {
-                        o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
-                        o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
-                        i_idx[i] = o_idx[i];
-                        i_idx[i - 1] = o_idx[i - 1];
-                    }
-                }
-                // then loop over each region in each pooling dimension
-        """
-        for i in range(nd):
-            ccode += """
-                for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
-                  r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
-                  r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
-                  // skip the padding
-                  r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
-                  r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
-                  // from padded_img space to img space
-                  r_st[%(i)s] -= pd[%(i)s];
-                  r_end[%(i)s] -= pd[%(i)s];
-                  // handle the case where no padding, ignore border is True
-                  if (%(params)s->ignore_border)
-                  {
-                    r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
-                  }
-                  // use the index to find the correct position in the output
-                  o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
-            """ % dict(
-                i=i, params=sub["params"], non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                  // get a pointer to the correct position in the output
-                  dtype_%(z)s * z;
-                  if (%(total_ndim)s == 4)
-                    z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, o_idx[0], o_idx[1], o_idx[2], o_idx[3])));
-                  else
-                    z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s, o_idx)));
-        """
-        for i in range(nd):
-            ccode += """
-              // set the first index of dimension %(i)s
-              i_idx[%(non_pool_ndim)s + %(i)s] = r_st[%(i)s];
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-              // use the first element as the initial value of collector
-              if (%(total_ndim)s == 4) {
-                collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                eval_collector = ((dtype_%(ex)s*)(PyArray_GETPTR4(%(ex)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-              } else {
-                collector = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                eval_collector = ((dtype_%(ex)s*)(PyArray_GetPtr(%(ex)s,i_idx)))[0];
-              }
-        """
-        for i in range(nd):
-            ccode += """
-              // go through the pooled region in the unpadded input
-              for(npy_intp m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
-              {
-                i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
-            """ % dict(
-                i=i, non_pool_ndim=non_pool_ndim
-            )
-        ccode += """
-                // update maximum
-                dtype_%(x)s a;
-                dtype_%(ex)s ea;
-                if (%(total_ndim)s == 4) {
-                  a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                  ea = ((dtype_%(ex)s*)(PyArray_GETPTR4(%(ex)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
-                }
-                else {
-                  a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
-                  ea = ((dtype_%(ex)s*)(PyArray_GetPtr(%(ex)s,i_idx)))[0];
-                }
-                if (a > collector) {
-                  collector = a;
-                  eval_collector = ea;
-                }
-        """
-        for i in range(nd):
-            ccode += """
-              } // for loop over region
-            """
-        ccode += """
-              z[0] = eval_collector;
-        """
-        for i in range(nd):
-            ccode += """
-            } // loop over pooling dimension
-            """
-        ccode += """
-          } // for loop over non-pooling dimensions
-        } // if z_prod
-        """
-        return ccode % locals()
-    def c_code_cache_version(self):
-        return (2, self.openmp)
--- a/tests/tensor/signal/test_pool.py
+++ b/tests/tensor/signal/test_pool.py
-import builtins
-from itertools import product
-import numpy as np
-import pytest
-import pytensor
-import pytensor.tensor as at
-from pytensor import function
-from pytensor.tensor.math import sum as at_sum
-from pytensor.tensor.signal.pool import (
-    AveragePoolGrad,
-    DownsampleFactorMaxGradGrad,
-    MaxPoolGrad,
-    Pool,
-    PoolGrad,
-    max_pool_2d_same_size,
-    pool_2d,
-    pool_3d,
-)
-from pytensor.tensor.type import (
-    TensorType,
-    dmatrix,
-    dtensor3,
-    dtensor4,
-    fmatrix,
-    ftensor3,
-    ftensor4,
-    ivector,
-    tensor,
-    tensor4,
-    vector,
-)
-from tests import unittest_tools as utt
-class TestDownsampleFactorMax(utt.InferShapeTester):
-    def test_out_shape(self):
-        assert Pool.out_shape((9, 8, 6), (2, 2)) == [9, 4, 3]
-        assert Pool.out_shape((8, 6), (2, 2)) == [4, 3]
-    @staticmethod
-    def numpy_max_pool_2d(input, ws, ignore_border=False, mode="max"):
-        """Helper function, implementing pool_2d in pure numpy"""
-        if len(input.shape) < 2:
-            raise NotImplementedError(
-                f"input should have at least 2 dim, shape is {input.shape}"
-            )
-        xi = 0
-        yi = 0
-        if not ignore_border:
-            if input.shape[-2] % ws[0]:
-                xi += 1
-            if input.shape[-1] % ws[1]:
-                yi += 1
-        out_shp = list(input.shape[:-2])
-        out_shp.append(input.shape[-2] // ws[0] + xi)
-        out_shp.append(input.shape[-1] // ws[1] + yi)
-        output_val = np.zeros(out_shp)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-        for k in np.ndindex(*input.shape[:-2]):
-            for i in range(output_val.shape[-2]):
-                ii = i * ws[0]
-                for j in range(output_val.shape[-1]):
-                    jj = j * ws[1]
-                    patch = input[k][ii : ii + ws[0], jj : jj + ws[1]]
-                    output_val[k][i, j] = func(patch)
-        return output_val
-    @staticmethod
-    def numpy_max_pool_nd(input, ws, ignore_border=False, mode="max"):
-        """Helper function, implementing pool_nd in pure numpy"""
-        if len(input.shape) < len(ws):
-            raise NotImplementedError(
-                f"input should have at least {ws} dim, shape is {input.shape}"
-            )
-        nd = len(ws)
-        si = [0] * nd
-        if not ignore_border:
-            for i in range(nd):
-                if input.shape[-nd + i] % ws[i]:
-                    si[i] += 1
-        out_shp = list(input.shape[:-nd])
-        for i in range(nd):
-            out_shp.append(input.shape[-nd + i] // ws[i] + si[i])
-        output_val = np.zeros(out_shp)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-        for l in np.ndindex(*input.shape[:-nd]):
-            for r in np.ndindex(*output_val.shape[-nd:]):
-                patch = input[l][
-                    tuple(slice(r[i] * ws[i], (r[i] + 1) * ws[i]) for i in range(nd))
-                ]
-                output_val[l][r] = func(patch)
-        return output_val
-    @staticmethod
-    def numpy_max_pool_2d_stride_pad(
-        x, ws, ignore_border=True, stride=None, pad=(0, 0), mode="max"
-    ):
-        assert ignore_border
-        pad_h = pad[0]
-        pad_w = pad[1]
-        h = x.shape[-2]
-        w = x.shape[-1]
-        assert ws[0] > pad_h
-        assert ws[1] > pad_w
-        def pad_img(x):
-            y = np.zeros(
-                (
-                    x.shape[0],
-                    x.shape[1],
-                    x.shape[2] + pad_h * 2,
-                    x.shape[3] + pad_w * 2,
-                ),
-                dtype=x.dtype,
-            )
-            y[:, :, pad_h : (x.shape[2] + pad_h), pad_w : (x.shape[3] + pad_w)] = x
-            return y
-        img_rows = h + 2 * pad_h
-        img_cols = w + 2 * pad_w
-        out_r = (img_rows - ws[0]) // stride[0] + 1
-        out_c = (img_cols - ws[1]) // stride[1] + 1
-        out_shp = list(x.shape[:-2])
-        out_shp.append(out_r)
-        out_shp.append(out_c)
-        ws0, ws1 = ws
-        stride0, stride1 = stride
-        output_val = np.zeros(out_shp)
-        y = pad_img(x)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-        inc_pad = mode == "average_inc_pad"
-        for k in np.ndindex(*x.shape[:-2]):
-            for i in range(output_val.shape[-2]):
-                ii_stride = i * stride[0]
-                ii_end = builtins.min(ii_stride + ws[0], img_rows)
-                if not inc_pad:
-                    ii_stride = builtins.max(ii_stride, pad_h)
-                    ii_end = builtins.min(ii_end, h + pad_h)
-                for j in range(output_val.shape[-1]):
-                    jj_stride = j * stride[1]
-                    jj_end = builtins.min(jj_stride + ws[1], img_cols)
-                    if not inc_pad:
-                        jj_stride = builtins.max(jj_stride, pad_w)
-                        jj_end = builtins.min(jj_end, w + pad_w)
-                    patch = y[k][ii_stride:ii_end, jj_stride:jj_end]
-                    output_val[k][i, j] = func(patch)
-        return output_val
-    @staticmethod
-    def numpy_max_pool_nd_stride_pad(
-        input, ws, ignore_border=True, stride=None, pad=None, mode="max"
-    ):
-        assert ignore_border
-        nd = len(ws)
-        if pad is None:
-            pad = (0,) * nd
-        if stride is None:
-            stride = (0,) * nd
-        assert len(pad) == len(ws) == len(stride)
-        assert all(ws[i] > pad[i] for i in range(nd))
-        def pad_img(x):
-            # initialize padded input
-            y = np.zeros(
-                x.shape[0:-nd]
-                + tuple(x.shape[-nd + i] + pad[i] * 2 for i in range(nd)),
-                dtype=x.dtype,
-            )
-            # place the unpadded input in the center
-            block = (slice(None),) * (len(x.shape) - nd) + tuple(
-                slice(pad[i], x.shape[-nd + i] + pad[i]) for i in range(nd)
-            )
-            y[block] = x
-            return y
-        pad_img_shp = list(input.shape[:-nd])
-        out_shp = list(input.shape[:-nd])
-        for i in range(nd):
-            padded_size = input.shape[-nd + i] + 2 * pad[i]
-            pad_img_shp.append(padded_size)
-            out_shp.append((padded_size - ws[i]) // stride[i] + 1)
-        output_val = np.zeros(out_shp)
-        padded_input = pad_img(input)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-        inc_pad = mode == "average_inc_pad"
-        for l in np.ndindex(*input.shape[:-nd]):
-            for r in np.ndindex(*output_val.shape[-nd:]):
-                region = []
-                for i in range(nd):
-                    r_stride = r[i] * stride[i]
-                    r_end = builtins.min(r_stride + ws[i], pad_img_shp[-nd + i])
-                    if not inc_pad:
-                        r_stride = builtins.max(r_stride, pad[i])
-                        r_end = builtins.min(r_end, input.shape[-nd + i] + pad[i])
-                    region.append(slice(r_stride, r_end))
-                patch = padded_input[l][tuple(region)]
-                output_val[l][r] = func(patch)
-        return output_val
-    @staticmethod
-    def numpy_max_pool_2d_stride(
-        input, ws, ignore_border=False, stride=None, mode="max"
-    ):
-        """Helper function, implementing pool_2d in pure numpy
-        this function provides stride input to indicate the stide size
-        for the pooling regions. if not indicated, stride == ws."""
-        if len(input.shape) < 2:
-            raise NotImplementedError(
-                f"input should have at least 2 dim, shape is {input.shape}"
-            )
-        if stride is None:
-            stride = ws
-        img_rows = input.shape[-2]
-        img_cols = input.shape[-1]
-        out_r = 0
-        out_c = 0
-        if img_rows - ws[0] >= 0:
-            out_r = (img_rows - ws[0]) // stride[0] + 1
-        if img_cols - ws[1] >= 0:
-            out_c = (img_cols - ws[1]) // stride[1] + 1
-        if not ignore_border:
-            if out_r > 0:
-                if img_rows - ((out_r - 1) * stride[0] + ws[0]) > 0:
-                    rr = img_rows - out_r * stride[0]
-                    if rr > 0:
-                        out_r += 1
-            else:
-                if img_rows > 0:
-                    out_r += 1
-            if out_c > 0:
-                if img_cols - ((out_c - 1) * stride[1] + ws[1]) > 0:
-                    cr = img_cols - out_c * stride[1]
-                    if cr > 0:
-                        out_c += 1
-            else:
-                if img_cols > 0:
-                    out_c += 1
-        out_shp = list(input.shape[:-2])
-        out_shp.append(out_r)
-        out_shp.append(out_c)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-        output_val = np.zeros(out_shp)
-        for k in np.ndindex(*input.shape[:-2]):
-            for i in range(output_val.shape[-2]):
-                ii_stride = i * stride[0]
-                ii_end = builtins.min(ii_stride + ws[0], img_rows)
-                for j in range(output_val.shape[-1]):
-                    jj_stride = j * stride[1]
-                    jj_end = builtins.min(jj_stride + ws[1], img_cols)
-                    patch = input[k][ii_stride:ii_end, jj_stride:jj_end]
-                    output_val[k][i, j] = func(patch)
-        return output_val
-    @staticmethod
-    def numpy_max_pool_nd_stride(
-        input, ws, ignore_border=False, stride=None, mode="max"
-    ):
-        """Helper function, implementing pooling in pure numpy
-        this function provides stride input to indicate the stide size
-        for the pooling regions. if not indicated, stride == ws."""
-        nd = len(ws)
-        if stride is None:
-            stride = ws
-        assert len(stride) == len(ws)
-        out_shp = list(input.shape[:-nd])
-        for i in range(nd):
-            out = 0
-            if input.shape[-nd + i] - ws[i] >= 0:
-                out = (input.shape[-nd + i] - ws[i]) // stride[i] + 1
-            if not ignore_border:
-                if out > 0:
-                    if input.shape[-nd + i] - ((out - 1) * stride[i] + ws[i]) > 0:
-                        if input.shape[-nd + i] - out * stride[i] > 0:
-                            out += 1
-                else:
-                    if input.shape[-nd + i] > 0:
-                        out += 1
-            out_shp.append(out)
-        func = np.max
-        if mode == "sum":
-            func = np.sum
-        elif mode != "max":
-            func = np.average
-        output_val = np.zeros(out_shp)
-        for l in np.ndindex(*input.shape[:-nd]):
-            for r in np.ndindex(*output_val.shape[-nd:]):
-                region = []
-                for i in range(nd):
-                    r_stride = r[i] * stride[i]
-                    r_end = builtins.min(r_stride + ws[i], input.shape[-nd + i])
-                    region.append(slice(r_stride, r_end))
-                patch = input[l][tuple(region)]
-                output_val[l][r] = func(patch)
-        return output_val
-    def test_DownsampleFactorMax(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, input size
-        examples = (
-            ((2,), (16,)),
-            (
-                (2,),
-                (
-                    4,
-                    16,
-                ),
-            ),
-            (
-                (2,),
-                (
-                    4,
-                    2,
-                    16,
-                ),
-            ),
-            ((1, 1), (4, 2, 16, 16)),
-            ((2, 2), (4, 2, 16, 16)),
-            ((3, 3), (4, 2, 16, 16)),
-            ((3, 2), (4, 2, 16, 16)),
-            ((3, 2, 2), (3, 2, 16, 16, 16)),
-            ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
-        )
-        for example, ignore_border, mode in product(
-            examples,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            (maxpoolshp, inputsize) = example
-            imval = rng.random(inputsize)
-            images = pytensor.shared(imval)
-            # Pure Numpy computation
-            numpy_output_val = self.numpy_max_pool_nd(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-            # The pool_2d or pool_3d helper methods
-            if len(maxpoolshp) == 2:
-                output = pool_2d(images, maxpoolshp, ignore_border, mode=mode)
-                f = function(
-                    [],
-                    [
-                        output,
-                    ],
-                )
-                output_val = f()
-                utt.assert_allclose(output_val, numpy_output_val)
-            elif len(maxpoolshp) == 3:
-                output = pool_3d(images, maxpoolshp, ignore_border, mode=mode)
-                f = function(
-                    [],
-                    [
-                        output,
-                    ],
-                )
-                output_val = f()
-                utt.assert_allclose(output_val, numpy_output_val)
-            # Pool op
-            maxpool_op = Pool(
-                ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode
-            )(images, maxpoolshp)
-            output_shape = Pool.out_shape(
-                imval.shape,
-                maxpoolshp,
-                ndim=len(maxpoolshp),
-                ignore_border=ignore_border,
-            )
-            utt.assert_allclose(np.asarray(output_shape), numpy_output_val.shape)
-            f = function([], maxpool_op)
-            output_val = f()
-            utt.assert_allclose(output_val, numpy_output_val)
-    def test_DownsampleFactorMaxStride(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, ignore_border, input, output sizes
-        examples = (
-            ((1, 1), (1, 1), True, (4, 10, 16, 16), (4, 10, 16, 16)),
-            ((1, 1), (5, 7), True, (4, 10, 16, 16), (4, 10, 4, 3)),
-            ((1, 1), (1, 1), False, (4, 10, 16, 16), (4, 10, 16, 16)),
-            ((1, 1), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
-            ((3, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 14, 14)),
-            ((3, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 5, 5)),
-            ((3, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
-            ((3, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 14, 14)),
-            ((3, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 6, 6)),
-            ((3, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
-            ((5, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 12, 14)),
-            ((5, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 4, 5)),
-            ((5, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
-            ((5, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 12, 14)),
-            ((5, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 5, 6)),
-            ((5, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
-            ((16, 16), (1, 1), True, (4, 10, 16, 16), (4, 10, 1, 1)),
-            ((16, 16), (5, 7), True, (4, 10, 16, 16), (4, 10, 1, 1)),
-            ((16, 16), (1, 1), False, (4, 10, 16, 16), (4, 10, 1, 1)),
-            ((16, 16), (5, 7), False, (4, 10, 16, 16), (4, 10, 1, 1)),
-            ((3,), (5,), True, (16,), (3,)),
-            (
-                (3,),
-                (5,),
-                True,
-                (
-                    2,
-                    16,
-                ),
-                (
-                    2,
-                    3,
-                ),
-            ),
-            (
-                (5,),
-                (3,),
-                True,
-                (
-                    2,
-                    3,
-                    16,
-                ),
-                (
-                    2,
-                    3,
-                    4,
-                ),
-            ),
-            ((5, 1, 3), (3, 3, 3), True, (2, 16, 16, 16), (2, 4, 6, 5)),
-            ((5, 1, 3), (3, 3, 3), True, (4, 2, 16, 16, 16), (4, 2, 4, 6, 5)),
-        )
-        for example, mode in product(
-            examples, ["max", "sum", "average_inc_pad", "average_exc_pad"]
-        ):
-            (maxpoolshp, stride, ignore_border, inputshp, outputshp) = example
-            # generate random images
-            imval = rng.random(inputshp)
-            images = pytensor.shared(imval)
-            # Pool op
-            numpy_output_val = self.numpy_max_pool_nd_stride(
-                imval, maxpoolshp, ignore_border, stride, mode
-            )
-            assert (
-                numpy_output_val.shape == outputshp
-            ), f"outshape is {outputshp}, calculated shape is {numpy_output_val.shape}"
-            maxpool_op = Pool(
-                ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode
-            )(images, maxpoolshp, stride)
-            f = function([], maxpool_op)
-            output_val = f()
-            utt.assert_allclose(output_val, numpy_output_val)
-    def test_DownsampleFactorMaxStrideExtra(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
-        stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1), (2, 3), (10, 10), (1, 1))
-        imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5), (8, 5), (8, 5), (8, 5))
-        outputshps = (
-            (4, 10, 4, 7),
-            (4, 10, 5, 8),
-            (4, 10, 2, 3),
-            (4, 10, 3, 4),
-            (4, 10, 2, 3),
-            (4, 10, 2, 3),
-            (4, 10, 4, 1),
-            (4, 10, 4, 1),
-            (4, 10, 3, 2),
-            (4, 10, 4, 2),
-            (4, 10, 1, 0),
-            (4, 10, 1, 1),
-            (4, 10, 0, 0),
-            (4, 10, 1, 1),
-        )
-        images = dtensor4()
-        for indx in np.arange(len(maxpoolshps)):
-            imvsize = imvsizs[indx]
-            imval = rng.random((4, 10, imvsize[0], imvsize[1]))
-            stride = stridesizes[indx]
-            maxpoolshp = maxpoolshps[indx]
-            for ignore_border, mode in product(
-                [True, False], ["max", "sum", "average_inc_pad", "average_exc_pad"]
-            ):
-                indx_out = indx * 2
-                if not ignore_border:
-                    indx_out += 1
-                outputshp = outputshps[indx_out]
-                # Pool op
-                numpy_output_val = self.numpy_max_pool_2d_stride(
-                    imval, maxpoolshp, ignore_border, stride, mode
-                )
-                assert (
-                    numpy_output_val.shape == outputshp
-                ), "outshape is {}, calculated shape is {}".format(
-                    outputshp,
-                    numpy_output_val.shape,
-                )
-                maxpool_op = Pool(
-                    ignore_border=ignore_border, ndim=len(maxpoolshp), mode=mode
-                )(images, maxpoolshp, stride)
-                f = function([images], maxpool_op)
-                output_val = f(imval)
-                utt.assert_allclose(output_val, numpy_output_val)
-    def test_DownsampleFactorMaxPaddingStride(self):
-        ignore_border = True  # padding does not support ignore_border=False
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, pad, input sizes
-        examples = (
-            ((3,), (2,), (2,), (5,)),
-            ((3,), (2,), (2,), (4, 5)),
-            ((3,), (2,), (2,), (4, 2, 5, 5)),
-            ((3, 3), (2, 2), (2, 2), (4, 2, 5, 5)),
-            ((4, 4), (2, 2), (1, 2), (4, 2, 5, 5)),
-            ((3, 4), (1, 1), (2, 1), (4, 2, 5, 6)),
-            ((4, 3), (1, 2), (0, 0), (4, 2, 6, 5)),
-            ((2, 2), (2, 2), (1, 1), (4, 2, 5, 5)),
-            ((4, 3, 2), (1, 2, 2), (0, 2, 1), (4, 6, 6, 5)),
-            ((4, 3, 2), (1, 2, 2), (0, 2, 1), (4, 2, 6, 5, 5)),
-        )
-        for example, mode in product(
-            examples, ["max", "sum", "average_inc_pad", "average_exc_pad"]
-        ):
-            (maxpoolshp, stridesize, padsize, inputsize) = example
-            imval = rng.random(inputsize) - 0.5
-            images = pytensor.shared(imval)
-            numpy_output_val = self.numpy_max_pool_nd_stride_pad(
-                imval, maxpoolshp, ignore_border, stridesize, padsize, mode
-            )
-            maxpool_op = Pool(
-                ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode
-            )(images, maxpoolshp, stridesize, padsize)
-            f = function([], maxpool_op)
-            output_val = f()
-            utt.assert_allclose(output_val, numpy_output_val)
-    def test_DownsampleFactorMaxPaddingStride_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, pad, input sizes
-        examples = (
-            ((10,), (5,), (3,), (2,)),
-            ((10,), (5,), (3,), (2, 2)),
-            ((10,), (5,), (3,), (1, 1, 2)),
-            ((10, 10), (5, 3), (3, 2), (1, 1, 2, 2)),
-            ((10, 5), (3, 5), (2, 3), (1, 1, 2, 1)),
-            ((5, 5), (3, 3), (3, 3), (1, 1, 2, 2)),
-            ((5, 5, 5), (3, 3, 3), (3, 3, 3), (1, 1, 2, 2, 2)),
-        )
-        # average_inc_pad and average_exc_pad do not
-        # support grad with padding
-        for mode in ["max", "sum"]:
-            for example in examples:
-                (maxpoolshp, stridesize, padsize, inputsize) = example
-                imval = rng.random(inputsize) * 10.0
-                def mp(input):
-                    return Pool(
-                        ndim=len(maxpoolshp),
-                        ignore_border=True,
-                        mode=mode,
-                    )(input, maxpoolshp, stridesize, padsize)
-                utt.verify_grad(mp, [imval], rng=rng)
-    def test_DownsampleFactorMax_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, input sizes
-        examples = (
-            ((2,), (3,)),
-            ((2,), (2, 3)),
-            ((2,), (2, 3, 3)),
-            ((1, 1), (2, 3, 3, 4)),
-            ((3, 2), (2, 3, 3, 4)),
-            ((2, 3), (2, 3, 3, 4)),
-            ((1, 1, 1), (2, 3, 3)),
-            ((3, 2, 2), (2, 3, 3, 4)),
-            ((2, 2, 3), (2, 3, 3, 4, 4)),
-        )
-        for example, ignore_border, mode in product(
-            examples,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            (maxpoolshp, inputsize) = example
-            imval = rng.random(inputsize) * 10.0
-            # more variance means numeric gradient will be more accurate
-            def mp(input):
-                return Pool(
-                    ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode
-                )(input, maxpoolshp)
-            utt.verify_grad(mp, [imval], rng=rng)
-    # pool, stride, input sizes
-    pool_grad_stride_examples = (
-        ((1,), (1,), (16,)),
-        ((1,), (3,), (1, 16)),
-        ((1,), (5,), (1, 2, 16)),
-        ((2,), (1,), (16,)),
-        ((2,), (3,), (1, 16)),
-        ((2,), (5,), (1, 2, 16)),
-        ((1, 1), (1, 1), (1, 2, 16, 16)),
-        ((1, 1), (3, 3), (1, 2, 16, 16)),
-        ((1, 1), (5, 7), (1, 2, 16, 16)),
-        ((3, 3), (1, 1), (1, 2, 16, 16)),
-        ((3, 3), (3, 3), (1, 2, 16, 16)),
-        ((3, 3), (5, 7), (1, 2, 16, 16)),
-        ((5, 3), (1, 1), (1, 2, 16, 16)),
-        ((5, 3), (3, 3), (1, 2, 16, 16)),
-        ((5, 3), (5, 7), (1, 2, 16, 16)),
-        ((5, 1, 2), (1, 1, 1), (16, 3, 16)),
-        ((5, 1, 2), (3, 1, 2), (1, 16, 3, 16)),
-        ((5, 1, 2), (5, 1, 4), (1, 2, 16, 3, 16)),
-        ((5, 3), (3, 2), (1, 2, 16, 16)),
-        ((5, 3), (7, 5), (1, 2, 16, 16)),
-        ((5, 3), (10, 6), (1, 2, 16, 16)),
-        ((5, 5), (1, 1), (1, 2, 8, 5)),
-        ((3, 2), (2, 3), (1, 2, 8, 5)),
-        ((7, 7), (10, 10), (1, 2, 8, 5)),
-        ((9, 9), (1, 1), (1, 2, 8, 5)),
-    )
-    @pytest.mark.slow
-    @pytest.mark.parametrize(
-        "example, ignore_border, mode",
-        product(
-            pool_grad_stride_examples,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ),
-    )
-    def test_DownsampleFactorMax_grad_stride(self, example, ignore_border, mode):
-        # checks the gradient for the case that stride is used
-        rng = np.random.default_rng(utt.fetch_seed())
-        (maxpoolshp, stridesize, inputsize) = example
-        imval = rng.random(inputsize)
-        def mp(input):
-            return Pool(ndim=len(maxpoolshp), ignore_border=ignore_border, mode=mode)(
-                input, maxpoolshp, stridesize
-            )
-        utt.verify_grad(mp, [imval], rng=rng)
-    def test_DownsampleFactorMaxGrad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, input sizes
-        examples = (
-            ((2,), (2,)),
-            ((2,), (2, 3)),
-            ((1, 1), (2, 3, 3, 4)),
-            ((3, 2), (2, 3, 3, 4)),
-            ((2, 3), (2, 3, 3, 4)),
-            ((1, 1, 1), (2, 3, 3, 4)),
-            ((3, 2, 2), (2, 3, 3, 4)),
-            ((2, 3, 2), (2, 3, 3, 4)),
-            ((2, 2, 3), (2, 3, 3, 4)),
-            ((2, 2, 3), (2, 1, 3, 3, 4)),
-        )
-        for (maxpoolshp, inputsize) in examples:
-            imval = rng.random(inputsize) * 10.0
-            # more variance means numeric gradient will be more accurate
-            for ignore_border in [True, False]:
-                # print 'maxpoolshp =', maxpoolshp
-                # print 'ignore_border =', ignore_border
-                # The shape of the gradient will be the shape of the output
-                grad_shape = Pool.out_shape(
-                    imval.shape,
-                    maxpoolshp,
-                    ndim=len(maxpoolshp),
-                    ignore_border=ignore_border,
-                )
-                grad_val = rng.random(grad_shape) * 10.0
-                def mp(input, grad):
-                    out = Pool(ndim=len(maxpoolshp), ignore_border=ignore_border)(
-                        input, maxpoolshp
-                    )
-                    grad_op = MaxPoolGrad(
-                        ndim=len(maxpoolshp), ignore_border=ignore_border
-                    )
-                    return grad_op(input, out, grad, maxpoolshp)
-                utt.verify_grad(mp, [imval, grad_val], rng=rng)
-    def test_AveragePoolGrad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # avgpool, input sizes
-        examples = (
-            ((2,), (2,)),
-            ((2,), (2, 3)),
-            ((1, 1), (2, 3, 3, 4)),
-            ((3, 2), (2, 3, 3, 4)),
-            ((2, 3), (2, 3, 3, 4)),
-            ((3, 2, 2), (2, 3, 3, 4)),
-            ((2, 2, 3), (2, 3, 3, 4)),
-        )
-        for (avgpoolshp, inputsize) in examples:
-            imval = rng.random(inputsize) * 10.0
-            # more variance means numeric gradient will be more accurate
-            for ignore_border in [True, False]:
-                for mode in ["sum", "average_inc_pad", "average_exc_pad"]:
-                    # print 'maxpoolshp =', maxpoolshp
-                    # print 'ignore_border =', ignore_border
-                    # The shape of the gradient will be the shape of the output
-                    grad_shape = Pool.out_shape(
-                        imval.shape,
-                        avgpoolshp,
-                        ndim=len(avgpoolshp),
-                        ignore_border=ignore_border,
-                    )
-                    grad_val = rng.random(grad_shape) * 10.0
-                    def mp(input, grad):
-                        grad_op = AveragePoolGrad(
-                            ndim=len(avgpoolshp), ignore_border=ignore_border, mode=mode
-                        )
-                        return grad_op(input, grad, avgpoolshp)
-                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
-    @pytest.mark.parametrize(
-        "example, ignore_border", product(pool_grad_stride_examples, [True, False])
-    )
-    def test_DownsampleFactorMaxGrad_grad_stride(self, example, ignore_border):
-        # checks the gradient of the gradient for
-        # the case that stride is used
-        rng = np.random.default_rng(utt.fetch_seed())
-        (maxpoolshp, stride, inputsize) = example
-        imval = rng.random(inputsize)
-        grad_shape = Pool.out_shape(
-            imval.shape,
-            maxpoolshp,
-            ndim=len(maxpoolshp),
-            ignore_border=ignore_border,
-            stride=stride,
-        )
-        # skip the grad verification when the output is empty
-        if np.prod(grad_shape) != 0:
-            grad_val = rng.random(grad_shape)
-            def mp(input, grad):
-                out = Pool(ndim=len(maxpoolshp), ignore_border=ignore_border)(
-                    input, maxpoolshp, stride
-                )
-                grad_op = MaxPoolGrad(ndim=len(maxpoolshp), ignore_border=ignore_border)
-                return grad_op(input, out, grad, maxpoolshp, stride)
-                utt.verify_grad(mp, [imval, grad_val], rng=rng)
-    @pytest.mark.slow
-    @pytest.mark.parametrize(
-        "example, ignore_border, mode",
-        product(
-            pool_grad_stride_examples,
-            [True, False],
-            ["sum", "average_inc_pad", "average_exc_pad"],
-        ),
-    )
-    def test_AveragePoolGrad_grad_stride(self, example, ignore_border, mode):
-        # checks the gradient of the gradient for
-        # the case that stride is used
-        rng = np.random.default_rng(utt.fetch_seed())
-        (avgpoolshp, stride, inputsize) = example
-        imval = rng.random(inputsize)
-        grad_shape = Pool.out_shape(
-            imval.shape,
-            avgpoolshp,
-            ndim=len(avgpoolshp),
-            ignore_border=ignore_border,
-            stride=stride,
-        )
-        # skip the grad verification when the output is empty
-        if np.prod(grad_shape) != 0:
-            grad_val = rng.random(grad_shape)
-            def mp(input, grad):
-                grad_op = AveragePoolGrad(
-                    ndim=len(avgpoolshp), ignore_border=ignore_border, mode=mode
-                )
-                return grad_op(input, grad, avgpoolshp, stride)
-            utt.verify_grad(mp, [imval, grad_val], rng=rng)
-    def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, pad, input sizes
-        examples = (
-            ((3,), (2,), (2,), (10,)),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    10,
-                ),
-            ),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    1,
-                    10,
-                ),
-            ),
-            ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
-            ((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
-            ((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
-            ((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
-        )
-        for (maxpoolshp, stridesize, padsize, inputsize) in examples:
-            imval = rng.random(inputsize) * 10.0
-            grad_shape = Pool.out_shape(
-                imval.shape,
-                maxpoolshp,
-                ndim=len(maxpoolshp),
-                stride=stridesize,
-                ignore_border=True,
-                pad=padsize,
-            )
-            grad_val = rng.random(grad_shape) * 10.0
-            def mp(input, grad):
-                out = Pool(
-                    ndim=len(maxpoolshp),
-                    ignore_border=True,
-                )(input, maxpoolshp, stridesize, padsize)
-                grad_op = MaxPoolGrad(ndim=len(maxpoolshp), ignore_border=True)
-                return grad_op(input, out, grad, maxpoolshp, stridesize, padsize)
-            utt.verify_grad(mp, [imval, grad_val], rng=rng)
-    def test_AveragePoolPaddingStride_grad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # avgpool, stride, pad, input sizes
-        examples = (
-            ((3,), (2,), (2,), (10,)),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    10,
-                ),
-            ),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    1,
-                    10,
-                ),
-            ),
-            ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
-            ((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
-            ((5, 3, 2), (3, 2, 1), (2, 2, 2), (1, 1, 10, 5, 5)),
-        )
-        for (avgpoolshp, stridesize, padsize, inputsize) in examples:
-            imval = rng.random(inputsize) * 10.0
-            # 'average_exc_pad' with non-zero padding is not implemented
-            for mode in ["sum", "average_inc_pad"]:
-                grad_shape = Pool.out_shape(
-                    imval.shape,
-                    avgpoolshp,
-                    ndim=len(avgpoolshp),
-                    stride=stridesize,
-                    ignore_border=True,
-                    pad=padsize,
-                )
-                grad_val = rng.random(grad_shape) * 10.0
-                def mp(input, grad):
-                    grad_op = AveragePoolGrad(
-                        ndim=len(avgpoolshp), ignore_border=True, mode=mode
-                    )
-                    return grad_op(input, grad, avgpoolshp, stridesize, padsize)
-                utt.verify_grad(mp, [imval, grad_val], rng=rng)
-    def test_DownsampleFactorMax_hessian(self):
-        # Example provided by Frans Cronje, see
-        # https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J
-        x_vec = vector("x")
-        z = at.dot(x_vec.dimshuffle(0, "x"), x_vec.dimshuffle("x", 0))
-        y = pool_2d(input=z, ws=(2, 2), ignore_border=True)
-        C = at.exp(at_sum(y))
-        grad_hess = pytensor.gradient.hessian(cost=C, wrt=x_vec)
-        fn_hess = function(inputs=[x_vec], outputs=grad_hess)
-        # The value has been manually computed from the theoretical gradient,
-        # and confirmed by the implementation.
-        assert np.allclose(fn_hess([1, 2]), [[0.0, 0.0], [0.0, 982.7667]])
-    def test_DownsampleFactorMaxGradGrad_grad(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # maxpool, stride, pad, input sizes
-        examples = (
-            ((3,), (2,), (2,), (10,)),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    10,
-                ),
-            ),
-            (
-                (3,),
-                (2,),
-                (2,),
-                (
-                    2,
-                    1,
-                    10,
-                ),
-            ),
-            ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
-            ((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
-            ((3, 3), (3, 3), (2, 2), (1, 1, 5, 5)),
-            ((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
-            ((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
-        )
-        for (maxpoolshp, stridesize, padsize, inputsize) in examples:
-            imval1 = rng.random(inputsize) * 10.0
-            imval2 = rng.random(inputsize) * 10.0
-            def mp(input1, input2):
-                op1 = Pool(ndim=len(maxpoolshp), ignore_border=True)
-                pooled_out = op1(input1, maxpoolshp, stridesize, padsize)
-                op2 = DownsampleFactorMaxGradGrad(
-                    ndim=len(maxpoolshp), ignore_border=True
-                )
-                out = op2(input1, pooled_out, input2, maxpoolshp, stridesize, padsize)
-                return out
-            utt.verify_grad(mp, [imval1, imval2], rng=rng)
-    def test_max_pool_2d_2D(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((1, 1), (3, 2))
-        imval = rng.random((4, 5))
-        images = dmatrix()
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_2d(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-            output = pool_2d(images, maxpoolshp, ignore_border, mode=mode)
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-            def mp(input):
-                return pool_2d(input, maxpoolshp, ignore_border, mode=mode)
-            utt.verify_grad(mp, [imval], rng=rng)
-    def test_max_pool_3d_3D(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((1, 1, 1), (3, 2, 1))
-        imval = rng.random((4, 5, 6))
-        images = dtensor3()
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_nd(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-            output = pool_3d(images, maxpoolshp, ignore_border, mode=mode)
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-            def mp(input):
-                return pool_3d(input, maxpoolshp, ignore_border, mode=mode)
-            utt.verify_grad(mp, [imval], rng=rng)
-    def test_max_pool_3d_3D_deprecated_interface(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((1, 1, 1), (3, 2, 1))
-        imval = rng.random((4, 5, 6))
-        images = dtensor3()
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_nd(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-            output = pool_3d(
-                input=images,
-                ds=maxpoolshp,
-                ignore_border=ignore_border,
-                st=maxpoolshp,
-                padding=(0, 0, 0),
-                mode=mode,
-            )
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-            def mp(input):
-                return pool_3d(input, maxpoolshp, ignore_border, mode=mode)
-    def test_max_pool_2d_2D_same_size(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        test_input_array = np.array(
-            [[[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]]]
-        ).astype(pytensor.config.floatX)
-        test_answer_array = np.array(
-            [[[[0.0, 0.0, 0.0, 0.0], [0.0, 6.0, 0.0, 8.0]]]]
-        ).astype(pytensor.config.floatX)
-        input = tensor4(name="input")
-        patch_size = (2, 2)
-        op = max_pool_2d_same_size(input, patch_size)
-        op_output = function([input], op)(test_input_array)
-        utt.assert_allclose(op_output, test_answer_array)
-        def mp(input):
-            return max_pool_2d_same_size(input, patch_size)
-        utt.verify_grad(mp, [test_input_array], rng=rng)
-    def test_max_pool_2d_3D(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = [(1, 2)]
-        imval = rng.random((2, 3, 4))
-        images = dtensor3()
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_2d(
-                imval, maxpoolshp, ignore_border, mode
-            )
-            output = pool_2d(images, maxpoolshp, ignore_border, mode=mode)
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-    # removed as already tested in test_max_pool_2d_2D
-    # This make test in debug mode too slow.
-    #                def mp(input):
-    #                    return pool_2d(input, maxpoolshp, ignore_border)
-    #                utt.verify_grad(mp, [imval], rng=rng)
-    def test_max_pool_2d_6D(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = [(3, 2)]
-        imval = rng.random((2, 1, 1, 1, 3, 4))
-        images = TensorType("float64", shape=(None,) * 6)()
-        for maxpoolshp, ignore_border, mode in product(
-            maxpoolshps,
-            [True, False],
-            ["max", "sum", "average_inc_pad", "average_exc_pad"],
-        ):
-            # print 'maxpoolshp =', maxpoolshp
-            # print 'ignore_border =', ignore_border
-            numpy_output_val = self.numpy_max_pool_2d(
-                imval, maxpoolshp, ignore_border, mode=mode
-            )
-            output = pool_2d(images, maxpoolshp, ignore_border, mode=mode)
-            output_val = function([images], output)(imval)
-            utt.assert_allclose(output_val, numpy_output_val)
-    # removed as already tested in test_max_pool_2d_2D
-    # This make test in debug mode too slow.
-    #                def mp(input):
-    #                    return pool_2d(input, maxpoolshp, ignore_border)
-    #                utt.verify_grad(mp, [imval], rng=rng)
-    def test_infer_shape(self):
-        image = dtensor4()
-        maxout = dtensor4()
-        gz = dtensor4()
-        rng = np.random.default_rng(utt.fetch_seed())
-        maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3), (3, 2))
-        image_val = rng.random((4, 6, 7, 9))
-        out_shapes = [
-            [
-                [[4, 6, 7, 9], [4, 6, 7, 9]],
-                [[4, 6, 3, 4], [4, 6, 4, 5]],
-                [[4, 6, 2, 3], [4, 6, 3, 3]],
-                [[4, 6, 3, 3], [4, 6, 4, 3]],
-                [[4, 6, 2, 4], [4, 6, 3, 5]],
-            ],
-            [
-                [None, None],
-                [[4, 6, 4, 5], None],
-                [[4, 6, 3, 3], None],
-                [[4, 6, 4, 3], None],
-                [[4, 6, 3, 5], None],
-            ],
-            [
-                [None, None],
-                [None, None],
-                [[4, 6, 3, 4], None],
-                [[4, 6, 4, 4], None],
-                [None, None],
-            ],
-        ]
-        for i, maxpoolshp in enumerate(maxpoolshps):
-            for j, ignore_border in enumerate([True, False]):
-                for k, pad in enumerate([(0, 0), (1, 1), (1, 2)]):
-                    if out_shapes[k][i][j] is None:
-                        continue
-                    # checking shapes generated by Pool
-                    self._compile_and_check(
-                        [image],
-                        [Pool(ignore_border=ignore_border)(image, maxpoolshp, pad=pad)],
-                        [image_val],
-                        Pool,
-                    )
-                    # checking shapes generated by MaxPoolGrad
-                    maxout_val = rng.random(out_shapes[k][i][j])
-                    gz_val = rng.random(out_shapes[k][i][j])
-                    self._compile_and_check(
-                        [image, maxout, gz],
-                        [
-                            MaxPoolGrad(ignore_border=ignore_border)(
-                                image, maxout, gz, maxpoolshp, pad=pad
-                            )
-                        ],
-                        [image_val, maxout_val, gz_val],
-                        MaxPoolGrad,
-                        warn=False,
-                    )
-        # checking with broadcastable input
-        image = tensor(dtype="float64", shape=(None, None, 1, 1))
-        image_val = rng.random((4, 6, 1, 1))
-        self._compile_and_check(
-            [image],
-            [Pool(ignore_border=True)(image, (2, 2), pad=(0, 0))],
-            [image_val],
-            Pool,
-        )
-    def test_pooling_with_tensor_vars(self):
-        x = ftensor4()
-        window_size = ivector()
-        stride = ivector()
-        padding = ivector()
-        data = np.random.normal(0, 1, (1, 1, 5, 5)).astype("float32")
-        # checking variable params vs fixed params
-        for ignore_border in [True, False]:
-            for mode in ["max", "sum", "average_inc_pad", "average_exc_pad"]:
-                y = pool_2d(x, window_size, ignore_border, stride, padding, mode)
-                dx = pytensor.gradient.grad(y.sum(), x)
-                var_fct = pytensor.function([x, window_size, stride, padding], [y, dx])
-                for ws in (4, 2, 5):
-                    for st in (2, 3):
-                        for pad in (0, 1):
-                            if (
-                                pad > st
-                                or st > ws
-                                or (pad != 0 and not ignore_border)
-                                or (mode == "average_exc_pad" and pad != 0)
-                            ):
-                                continue
-                            y = pool_2d(
-                                x, (ws, ws), ignore_border, (st, st), (pad, pad), mode
-                            )
-                            dx = pytensor.gradient.grad(y.sum(), x)
-                            fix_fct = pytensor.function([x], [y, dx])
-                            var_y, var_dx = var_fct(
-                                data, (ws, ws), (st, st), (pad, pad)
-                            )
-                            fix_y, fix_dx = fix_fct(data)
-                            utt.assert_allclose(var_y, fix_y)
-                            utt.assert_allclose(var_dx, fix_dx)
-    def test_pooling_with_tensor_vars_deprecated_interface(self):
-        x = ftensor4()
-        window_size = ivector()
-        stride = ivector()
-        padding = ivector()
-        data = np.random.normal(0, 1, (1, 1, 5, 5)).astype("float32")
-        # checking variable params vs fixed params
-        for ignore_border in [True, False]:
-            for mode in ["max", "sum", "average_inc_pad", "average_exc_pad"]:
-                y = pool_2d(
-                    input=x,
-                    ds=window_size,
-                    ignore_border=ignore_border,
-                    st=stride,
-                    padding=padding,
-                    mode=mode,
-                )
-                dx = pytensor.gradient.grad(y.sum(), x)
-                var_fct = pytensor.function([x, window_size, stride, padding], [y, dx])
-                ws = 5
-                st = 3
-                pad = 1
-                if (
-                    pad > st
-                    or st > ws
-                    or (pad != 0 and not ignore_border)
-                    or (mode == "average_exc_pad" and pad != 0)
-                ):
-                    continue
-                y = pool_2d(
-                    input=x,
-                    ds=(ws, ws),
-                    ignore_border=ignore_border,
-                    st=(st, st),
-                    padding=(pad, pad),
-                    mode=mode,
-                )
-                dx = pytensor.gradient.grad(y.sum(), x)
-                fix_fct = pytensor.function([x], [y, dx])
-                var_y, var_dx = var_fct(data, (ws, ws), (st, st), (pad, pad))
-                fix_y, fix_dx = fix_fct(data)
-                utt.assert_allclose(var_y, fix_y)
-                utt.assert_allclose(var_dx, fix_dx)
-    @staticmethod
-    def checks_helper(func, x, ws, stride, pad):
-        with pytest.raises(
-            ValueError, match=r"You can't provide a tuple value to both 'ws' and 'ds'."
-        ):
-            func(x, ds=ws, ws=ws)
-        with pytest.raises(
-            ValueError, match="You must provide a tuple value for the window size."
-        ):
-            func(x)
-        with pytest.raises(
-            ValueError,
-            match=r"You can't provide a tuple value to both 'st and 'stride'.",
-        ):
-            func(x, ws=ws, st=stride, stride=stride)
-        with pytest.raises(
-            ValueError,
-            match=r"You can't provide a tuple value to both 'padding' and pad.",
-        ):
-            func(x, ws=ws, pad=pad, padding=pad)
-    def test_pool_2d_checks(self):
-        x = fmatrix()
-        self.checks_helper(pool_2d, x, ws=(1, 1), stride=(1, 1), pad=(1, 1))
-        with pytest.raises(
-            NotImplementedError, match="pool_2d requires a dimension >= 2"
-        ):
-            pool_2d(input=vector(), ws=(1, 1))
-        with pytest.deprecated_call():
-            out = pool_2d(input=x, ws=(1, 1))
-        assert not out.owner.op.ignore_border
-    def test_pool_3d_checks(self):
-        x = ftensor3()
-        self.checks_helper(pool_3d, x, ws=(1, 1, 1), stride=(1, 1, 1), pad=(1, 1, 1))
-        with pytest.raises(
-            NotImplementedError, match="pool_3d requires a dimension >= 3"
-        ):
-            pool_3d(input=fmatrix(), ws=(1, 1, 1))
-        with pytest.deprecated_call():
-            out = pool_3d(input=x, ws=(1, 1, 1))
-        assert not out.owner.op.ignore_border
-    @pytest.mark.parametrize("func", [Pool.out_shape, PoolGrad.out_shape])
-    def test_Pool_out_shape_checks(self, func):
-        x = (10, 10)
-        self.checks_helper(func, x, ws=(1, 1), stride=(1, 1), pad=(1, 1))
-        with pytest.raises(TypeError, match="imgshape must have at least 3 dimensions"):
-            func(x, (2, 2), ndim=3)
-    def test_Pool_make_node_checks(self):
-        x = fmatrix()
-        with pytest.raises(
-            NotImplementedError, match="padding works only with ignore_border=True"
-        ):
-            op = Pool(ignore_border=False, ndim=2)
-            op(x, (1, 1), pad=(1, 1))
-        with pytest.raises(
-            NotImplementedError, match="padding must be smaller than strides"
-        ):
-            op = Pool(ignore_border=True, ndim=2)
-            op(x, (1, 1), pad=(2, 2))
-        with pytest.raises(TypeError):
-            op = Pool(ignore_border=True, ndim=3)
-            op(x, (1, 1))
-        op = Pool(ignore_border=True, ndim=2)
-        with pytest.raises(TypeError, match="Pool downsample parameters must be ints."):
-            op(x, (1.0, 1.0))
-        with pytest.raises(TypeError, match="Stride parameters must be ints."):
-            op(x, (1, 1), stride=(1.0, 1.0))
-        with pytest.raises(TypeError, match="Padding parameters must be ints."):
-            op(x, (2, 2), pad=(1.0, 1.0))
-    def test_MaxPoolGrad_make_node_checks(self):
-        x = fmatrix()
-        op = MaxPoolGrad(ignore_border=True, ndim=2)
-        with pytest.raises(TypeError, match="Pool downsample parameters must be ints."):
-            op(x, maxout=[[1, 1], [1, 1]], gz=[[1, 1], [1, 1]], ws=(1.0, 1, 0))
-        with pytest.raises(TypeError, match="Stride parameters must be ints."):
-            op(
-                x,
-                maxout=[[1, 1], [1, 1]],
-                gz=[[1, 1], [1, 1]],
-                ws=(1, 1),
-                stride=(1.0, 1.0),
-            )
-        with pytest.raises(TypeError, match="Padding parameters must be ints."):
-            op(
-                x,
-                maxout=[[1, 1], [1, 1]],
-                gz=[[1, 1], [1, 1]],
-                ws=(1, 1),
-                pad=(1.0, 1.0),
-            )
--- a/tests/test_rop.py
+++ b/tests/test_rop.py
@@ -12,8 +12,6 @@ check_nondiff_rop,
 """
-import itertools
 import numpy as np
 import pytest
@@ -26,7 +24,6 @@ from pytensor.graph.op import Op
 from pytensor.tensor.math import argmax, dot
 from pytensor.tensor.math import max as at_max
 from pytensor.tensor.shape import unbroadcast
-from pytensor.tensor.signal.pool import Pool
 from pytensor.tensor.type import matrix, vector
 from tests import unittest_tools as utt
@@ -248,63 +245,6 @@ class TestRopLop(RopLopChecker):
            unbroadcast(self.x[:4].dimshuffle("x", 0), 0).sum(axis=1), (1,)
        )
-    @pytest.mark.slow
-    def test_downsample(self):
-        rng = np.random.default_rng(utt.fetch_seed())
-        # ws, shp
-        examples = (
-            ((2,), (16,)),
-            (
-                (2,),
-                (
-                    4,
-                    16,
-                ),
-            ),
-            (
-                (2,),
-                (
-                    4,
-                    2,
-                    16,
-                ),
-            ),
-            ((1, 1), (4, 2, 16, 16)),
-            ((2, 2), (4, 2, 16, 16)),
-            ((3, 3), (4, 2, 16, 16)),
-            ((3, 2), (4, 2, 16, 16)),
-            ((3, 2, 2), (3, 2, 16, 16, 16)),
-            ((2, 3, 2), (3, 2, 16, 16, 16)),
-            ((2, 2, 3), (3, 2, 16, 16, 16)),
-            ((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
-        )
-        for example, ignore_border in itertools.product(examples, [True, False]):
-            (ws, shp) = example
-            vx = rng.random(shp)
-            vex = rng.random(shp)
-            x = pytensor.shared(vx)
-            ex = pytensor.shared(vex)
-            maxpool_op = Pool(ignore_border, ndim=len(ws))
-            a_pooled = maxpool_op(x, ws).flatten()
-            yv = Rop(a_pooled, x, ex)
-            mode = None
-            if pytensor.config.mode == "FAST_COMPILE":
-                mode = "FAST_RUN"
-            rop_f = function([], yv, on_unused_input="ignore", mode=mode)
-            sy, _ = pytensor.scan(
-                lambda i, y, x, v: (grad(y[i], x) * v).sum(),
-                sequences=at.arange(a_pooled.shape[0]),
-                non_sequences=[a_pooled, x, ex],
-                mode=mode,
-            )
-            scan_f = function([], sy, on_unused_input="ignore", mode=mode)
-            v1 = rop_f()
-            v2 = scan_f()
-            assert np.allclose(v1, v2), f"Rop mismatch: {v1} {v2}"
    def test_join(self):
        tv = np.asarray(self.rng.uniform(size=(10,)), pytensor.config.floatX)
        t = pytensor.shared(tv)