Merge pull request #3644 from laurent-dinh/conv_infer_shape

Factoring inference of convolution output shape

Merge pull request #3644 from laurent-dinh/conv_infer_shape
9b8847df · Pascal Lamblin · 755f2218 · c10feb1a · 9b8847df · 9b8847df
--- a/theano/sandbox/gpuarray/dnn.py
+++ b/theano/sandbox/gpuarray/dnn.py
@@ -12,6 +12,7 @@ from theano.gof.type import CDataType, Generic
 from theano.compile import optdb
 from theano.compile.ops import shape_i
 from theano.tensor.nnet import SoftmaxGrad
+from theano.tensor.nnet.abstract_conv2d import get_conv_output_shape
 from theano.tensor.signal.downsample import (
    DownsampleFactorMax, MaxPoolGrad, AveragePoolGrad)
@@ -473,48 +474,11 @@ class GpuDnnConv(DnnBase):
        or scalar.
        """
-        b = ishape[0]  # Number of inputs
+        return get_conv_output_shape(
-        h = ishape[2]  # Height of input feature maps
+            ishape,
-        w = ishape[3]  # Width of input feature maps
+            kshape,
-        nb = kshape[0]  # Number of output feature maps
+            border_mode,
-        kh = kshape[2]  # Height of each filter
+            subsample)
-        kw = kshape[3]  # Width of each filter
-        nd = len(subsample)
-        if nd > 2:
-            d = ishape[4]
-            kd = ishape[4]
-        sh = subsample[0]
-        sw = subsample[1]
-        if nd > 2:
-            sd = subsample[2]
-        if border_mode == 'full':
-            padh = kh - 1
-            padw = kw - 1
-            if nd > 4:
-                padd = kd - 1
-        elif isinstance(border_mode, tuple):
-            padh = border_mode[0]
-            padw = border_mode[1]
-            if nd > 2:
-                padd = border_mode[2]
-        else:
-            assert border_mode == 'valid'
-            padh = 0
-            padw = 0
-            padd = 0
-        res = [b, nb,
-               (h + 2 * padh - kh) // sh + 1,
-               (w + 2 * padw - kw) // sw + 1]
-        if nd > 2:
-            res.append(d + 2 * padd - kd // sd + 1)
-        return res
    def infer_shape(self, node, shape):
        return [shape[2]]

--- a/theano/tensor/nnet/abstract_conv2d.py
+++ b/theano/tensor/nnet/abstract_conv2d.py
@@ -13,6 +13,89 @@ __docformat__ = "restructuredtext en"
 _logger = logging.getLogger("theano.tensor.nnet.conv2d")
+def get_conv_output_shape(image_shape, kernel_shape,
+                          border_mode, subsample):
+    """
+    This function compute the output shape of convolution operation.
+    Parameters
+    ----------
+    image_shape: tuple of int (symbolic or numeric) corresponding to the input
+        image shape. Its four (or five) element must correspond respectively
+        to: batch size, number of input channels, height and width (and
+        possibly depth) of the image. None where undefined.
+    kernel_shape: tuple of int (symbolic or numeric) corresponding to the
+        kernel shape. Its four (or five) elements must correspond respectively
+        to: number of output channels, number of input channels, height and
+        width (and possibly depth) of the kernel. None where undefined.
+    border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
+        or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
+        If it is a tuple, its two (or three) elements respectively correspond
+        to the padding on height and width (and possibly depth) axis.
+    subsample: tuple of int (symbolic or numeric). Its or three elements
+        espectively correspond to the subsampling on height and width (and
+        possibly depth) axis.
+    Returns
+    -------
+    output_shape: tuple of int corresponding to the output image shape. Its
+        four element must correspond respectively to: batch size, number of
+        output channels, height and width of the image. None where undefined.
+    """
+    bsize, imshp = image_shape[0], list(image_shape[2:])
+    nkern, kshp = kernel_shape[0], list(kernel_shape[2:])
+    if isinstance(border_mode, tuple):
+        out_shp = tuple(get_conv_shape_1axis(
+            imshp[i], kshp[i], border_mode[i], subsample[i])
+            for i in range(len(subsample)))
+    else:
+        out_shp = tuple(get_conv_shape_1axis(
+            imshp[i], kshp[i], border_mode, subsample[i])
+            for i in range(len(subsample)))
+    return (bsize, nkern) + out_shp
+def get_conv_shape_1axis(image_shape, kernel_shape,
+                         border_mode, subsample):
+    """
+    This function compute the output shape of convolution operation.
+    Parameters
+    ----------
+    image_shape: int or None. Corresponds to the input image shape on a
+        given axis. None if undefined.
+    kernel_shape: int or None. Corresponds to the kernel shape on a given
+        axis. None if undefined.
+    border_mode: string or int. If it is a string, it must be
+        'valid', 'half' or 'full'. If it is an integer, it must correspond to
+        the padding on the considered axis.
+    subsample: int. It must correspond to the subsampling on the
+        considered axis.
+    Returns
+    -------
+    out_shp: int corresponding to the output image shape on the
+        considered axis. None if undefined.
+    """
+    if None in [image_shape, kernel_shape, border_mode, subsample]:
+        return None
+    if border_mode == "half":
+        pad = kernel_shape // 2
+    elif border_mode == "full":
+        pad = kernel_shape - 1
+    elif border_mode == "valid":
+        pad = 0
+    else:
+        pad = border_mode
+        if pad < 0:
+            raise ValueError("border_mode must be >= 0")
+    out_shp = (image_shape + 2 * pad - kernel_shape) // subsample + 1
+    return out_shp
 def conv2d(input,
           filters,
           input_shape=None,

--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -14,12 +14,14 @@ import logging
 import numpy
 from six.moves import xrange
+import warnings
 import theano
 from theano import OpenMPOp
 from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value,
                           patternbroadcast, NotScalarConstantError)
 from theano.gof import Apply
+from theano.tensor.nnet.abstract_conv2d import get_conv_output_shape
 try:
    # TODO: move these back out to global scope when they no longer
@@ -363,10 +365,13 @@ class ConvOp(OpenMPOp):
        # The formula would be ceil((i + s * k - s * 1) / float(d)),
        # with s=1 for mode=='full' and s=-1 for mode=='valid'.
        # To support symbolic shapes, we express this with integer arithmetics.
-        return tuple(None if i is None or k is None
+        warnings.warn("The method `getOutputShape` is deprecated use"
-                     else ((i - k) // d + 1) if mode == 'valid'
+                      "`get_conv_output_shape` instead.")
-                     else ((i + k + d - 2) // d)
+        return get_conv_output_shape(
-                     for i, k, d in zip(inshp, kshp, stride))
+            image_shape=(None, None, inshp[0], inshp[1]),
+            kernel_shape=(None, None, kshp[0], kshp[1]),
+            border_mode=mode,
+            subsample=stride)
    def __init__(self, imshp=None, kshp=None, nkern=None, bsize=None,
                 dx=1, dy=1,
@@ -511,12 +516,16 @@ class ConvOp(OpenMPOp):
                _logger.warn(warnstr, self.unroll_kern, self.nkern, new)
                self.unroll_kern = new
-        self.outshp = ConvOp.getOutputShape(self.imshp_logical[1:],
+        self.outshp = get_conv_output_shape(
-                                            self.kshp_logical, (dx, dy),
+            (None,) + self.imshp_logical,
-                                            output_mode)
+            (None, None,) + self.kshp_logical,
-        self.fulloutshp = ConvOp.getOutputShape(self.imshp_logical[1:],
+            output_mode,
-                                                self.kshp_logical, (1, 1),
+            (dx, dy))[2:]
-                                                output_mode)
+        self.fulloutshp = get_conv_output_shape(
+            (None,) + self.imshp_logical,
+            (None, None,) + self.kshp_logical,
+            output_mode,
+            (1, 1))[2:]
        self.out_mode = output_mode
@@ -669,9 +678,12 @@ class ConvOp(OpenMPOp):
            if self.kshp_logical[i] is not None:
                kshp[i] = self.kshp_logical[i]
        # infer output shape from what we have
-        outshp = ConvOp.getOutputShape(imshp[1:], kshp, (self.dx, self.dy),
+        res = get_conv_output_shape(
-                                       self.out_mode)
+            (bsize,) + tuple(imshp),
-        return [(bsize, nkern) + outshp]
+            (nkern, None,) + tuple(kshp),
+            self.out_mode,
+            (self.dx, self.dy))
+        return [res]
    def perform(self, node, inp, out):
        """
@@ -737,8 +749,11 @@ class ConvOp(OpenMPOp):
        if all(shp is not None for shp in self.fulloutshp):
            fulloutshp = tuple(self.fulloutshp)
        else:
-            fulloutshp = tuple(ConvOp.getOutputShape(imshp_logical[
+            fulloutshp = get_conv_output_shape(
-                1:], kshp_logical, (1, 1), self.out_mode))
+                (None,) + imshp_logical,
+                (None, None,) + kshp_logical,
+                self.out_mode,
+                (1, 1))[2:]
        if z[0] is None or z[0].shape != (bsize, nkern,) + fulloutshp:
            z[0] = numpy.zeros((bsize, nkern,) + fulloutshp,

--- a/theano/tensor/nnet/corr.py
+++ b/theano/tensor/nnet/corr.py
@@ -5,6 +5,7 @@ import theano
 from theano import Apply
 from theano import gof
 from theano.tensor import as_tensor_variable, TensorType
+from theano.tensor.nnet.abstract_conv2d import get_conv_output_shape
 from theano.tensor.blas_headers import blas_header_text
 from theano.tensor.blas import ldflags
@@ -370,37 +371,14 @@ class CorrMM(BaseCorrMM):
        return Apply(self, [img, kern], [TensorType(dtype, broadcastable)()])
    def infer_shape(self, node, input_shape):
-        if self.border_mode == "half":
-            padH = padW = -1
-        elif self.border_mode == "full":
-            padH = padW = -2
-        elif isinstance(self.border_mode, tuple):
-            padH, padW = self.border_mode
-        else:
-            assert self.border_mode == "valid"
-            padH = padW = 0
-        dH, dW = self.subsample
        imshp = input_shape[0]
        kshp = input_shape[1]
-        bsize, imshp = imshp[0], list(imshp[2:])
+        res = get_conv_output_shape(
-        nkern, kshp = kshp[0], list(kshp[2:])
+            imshp,
-        kH, kW = kshp
+            kshp,
-        if padH == -1:
+            self.border_mode,
-            padH = kH // 2
+            self.subsample)
-        elif padH == -2:
+        return [res]
-            padH = kH - 1
-        elif padH < 0:
-            raise ValueError("CorrMM: border_mode must be >= 0")
-        if padW == -1:
-            padW = kW // 2
-        elif padW == -2:
-            padW = kW - 1
-        elif padW < 0:
-            raise ValueError("CorrMM: border_mode must be >= 0")
-        out_shp0 = (imshp[0] + 2 * padH - kshp[0]) // dH + 1
-        out_shp1 = (imshp[1] + 2 * padW - kshp[1]) // dW + 1
-        out_shp = (out_shp0, out_shp1)
-        return [(bsize, nkern) + out_shp]
    def c_code(self, node, nodename, inp, out_, sub):
        bottom, weights = inp