adding fractional bilinear upsampling

83a288fd · erakra · 404cea07 · 83a288fd · 83a288fd
--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -6,6 +6,7 @@ from __future__ import absolute_import, print_function, division
 import logging
 from six import reraise, integer_types
 import sys
+from fractions import gcd
 import theano
@@ -1508,8 +1509,14 @@ def bilinear_kernel_2D(ratio, normalize=True):
    """
-    hkern = bilinear_kernel_1D(ratio=ratio, normalize=normalize).dimshuffle('x', 0)
+    if isinstance(ratio, tuple):
-    vkern = bilinear_kernel_1D(ratio=ratio, normalize=normalize).dimshuffle(0, 'x')
+        ratio_h = ratio[1]
+        ratio_v = ratio[0]
+    else:
+        ratio_h = ratio
+        ratio_v = ratio
+    hkern = bilinear_kernel_1D(ratio=ratio_h, normalize=normalize).dimshuffle('x', 0)
+    vkern = bilinear_kernel_1D(ratio=ratio_v, normalize=normalize).dimshuffle(0, 'x')
    kern = hkern * vkern
    return kern
@@ -1547,6 +1554,126 @@ def bilinear_kernel_1D(ratio, normalize=True):
    return kern
+def frac_bilinear_upsampling(input,
+                             ratio=None,
+                             frac_ratio=None,
+                             use_1D_kernel=False):
+    """Compute bilinear upsampling
+    This function will build the symbolic graph for upsampling
+    a tensor by the given ratio using bilinear interpolation.
+    Parameters
+    ----------
+    input: symbolic 4D tensor
+        mini-batch of feature map stacks, of shape (batch size,
+        input channels, input rows, input columns) that will be upsampled.
+    ratio: `int or Constant or Scalar Tensor of int* dtype`
+        the ratio by which the input is upsampled in the 2D space (row and
+        col size).
+    frac_ratio: None, tuple of int or tuple of tuples of int
+        The tuple defining the fractional ratio by which the input is
+        upsampled in the 2D space. One fractional ratio should be
+        represented as (numerator, denominator). If row and col ratios are
+        different frac_ratio should be a tuple of fractional ratios, i.e
+        a tuple of tuples.
+    use_1D_kernel: bool
+        if set to true, row and column will be upsampled seperately by 1D
+        kernels, otherwise they are upsampled together using a 2D kernel. The
+        final result is the same, only the speed can differ, given factors such
+        as upsampling ratio.
+    Returns
+    -------
+    symbolic 4D tensor
+        set of feature maps generated by bilinear upsampling. Tensor
+        is of shape (batch size, num_input_channels, input row size * row ratio,
+        input column size * column ratio). Each of these ratios can be fractional.
+    Notes
+    -----
+    :note: The kernel used for bilinear interpolation is fixed (not learned).
+    :note: When the upsampling ratio is even, the last row and column is
+        repeated one extra time compared to the first row and column which makes
+        the upsampled tensor asymmetrical on both sides. This does not happen when
+        the upsampling ratio is odd.
+    :note: This function must get either ratio or frac_ratio as parameter and
+        never both at once.
+    """
+    if ratio and frac_ratio:
+        raise ValueError("can't use ratio and frac_ratio together")
+    if not (ratio or frac_ratio):
+        raise ValueError("No ratio (or frac_ratio) provided")
+    T = theano.tensor
+    row, col = input.shape[2:]
+    up_input = input.reshape((-1, 1, row, col))
+    # redefince the ratio depending of the case
+    if frac_ratio is None:
+        if not isinstance(ratio, tuple):
+            ratio = (ratio, ratio)
+        subsample = (1, 1)
+    else:
+        if not isinstance(frac_ratio, tuple):
+            raise ValueError("frac_ratio must be a tuple")
+        else:
+            if isinstance(frac_ratio[0], tuple):
+                f_r = []
+                for i, fr in enumerate(frac_ratio):
+                    p, q = fr
+                    div = gcd(p, q)
+                    f_r.append(tuple(np.array(fr) // div))
+                frac_ratio = tuple(f_r)
+                ratio = (frac_ratio[0][0], frac_ratio[1][0])
+                subsample = (frac_ratio[0][1], frac_ratio[1][1])
+            else:
+                p, q = frac_ratio
+                div = gcd(p, q)
+                frac_ratio = tuple(np.array(frac_ratio) // div)
+                ratio = (frac_ratio[0], frac_ratio[0])
+                subsample = (frac_ratio[1], frac_ratio[1])
+    # duplicate borders of the input
+    concat_mat = T.concatenate((up_input[:, :, :1, :], up_input,
+                                up_input[:, :, -1:, :]), axis=2)
+    concat_mat = T.concatenate((concat_mat[:, :, :, :1], concat_mat,
+                                concat_mat[:, :, :, -1:]), axis=3)
+    # add padding for the pyramidal kernel
+    double_pad = (2 * T.as_tensor([row, col]) - 1) * np.array(ratio) + 1
+    pad = double_pad // 2
+    # build pyramidal kernel
+    if use_1D_kernel:
+        kern = bilinear_kernel_1D(ratio=ratio[0])[np.newaxis, np.newaxis,
+                                                  :, np.newaxis]
+    else:
+        kern = bilinear_kernel_2D(ratio=ratio)[np.newaxis, np.newaxis, :, :]
+    pad_kern = T.concatenate((T.zeros(tuple(kern.shape[:2]) + (pad[0], kern.shape[-1])),
+                              kern,
+                              T.zeros(tuple(kern.shape[:2]) + (double_pad[0]-pad[0], kern.shape[-1]))),
+                             axis=2)
+    if use_1D_kernel:
+        # for 1D kernel, upsample along rows
+        upsamp = T.nnet.conv2d(pad_kern, concat_mat, border_mode='valid', filter_dilation=(ratio[0], 1))
+        upsamp = upsamp.dimshuffle((1, 0, 2, 3))
+        pad_kern = bilinear_kernel_1D(ratio=ratio[1])[np.newaxis, np.newaxis, np.newaxis, :]
+    pad_kern = T.concatenate((T.zeros(tuple(pad_kern.shape[:3]) + (pad[1],)),
+                              pad_kern,
+                              T.zeros(tuple(pad_kern.shape[:3]) + (double_pad[1]-pad[1],))),
+                             axis=3)
+    if use_1D_kernel:
+        upsamp = T.nnet.conv2d(pad_kern, upsamp, border_mode='valid', filter_dilation=(1, ratio[1]),
+                               subsample=(1, 1))
+    else:
+        upsamp = T.nnet.conv2d(pad_kern, concat_mat, border_mode='valid', filter_dilation=ratio,
+                               subsample=subsample)
+    up_img_sh = T.ceil(T.as_tensor([row, col]) * np.array(ratio) / np.array(subsample)).astype('int64')
+    return upsamp.reshape((input.shape[0], input.shape[1], up_img_sh[0], up_img_sh[1]))
 def bilinear_upsampling(input,
                        ratio,
                        batch_size=None,

--- a/theano/tensor/nnet/tests/test_abstract_conv.py
+++ b/theano/tensor/nnet/tests/test_abstract_conv.py
@@ -23,7 +23,9 @@ from theano.tensor.nnet.abstract_conv import AbstractConv2d_gradWeights
 from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
 from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D
 from theano.tensor.nnet.abstract_conv import bilinear_upsampling
+from theano.tensor.nnet.abstract_conv import frac_bilinear_upsampling
 from theano.tensor.nnet.abstract_conv import separable_conv2d, separable_conv3d
+from theano.tensor.nnet.conv import ConvOp
 from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
                                     CorrMM_gradInputs)
 from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights,
@@ -1289,6 +1291,33 @@ class TestBilinearUpsampling(unittest.TestCase):
        f_2D = theano.function([], mat_2D, mode=self.compile_mode)
        utt.assert_allclose(f_1D(), f_2D(), rtol=1e-06)
+    def test_fractional_bilinear_upsampling(self):
+        """Test bilinear upsampling with nonsimilar fractional
+        row and col ratios
+        """
+        input_x = np.array([[[1, 2], [3, 4]],
+                            [[5, 6], [7, 8]],
+                            [[9, 10], [11, 12]]],
+                           ndmin=4).astype(theano.config.floatX)
+        up_x = frac_bilinear_upsampling(input=input_x,
+                                        frac_ratio=((7, 4), (5, 3)))
+        num_up_x = np.array(
+            [[[[1., 1.2, 1.8, 2.],
+              [1.28571429, 1.48571429, 2.08571429, 2.28571429],
+              [2.42857143, 2.62857143, 3.22857143, 3.42857143],
+              [3., 3.2, 3.8, 4.]],
+             [[5., 5.2, 5.8, 6.],
+              [5.28571429, 5.48571429, 6.08571429, 6.28571429],
+              [6.42857143, 6.62857143, 7.22857143, 7.42857143],
+              [7., 7.2, 7.8, 8.]],
+             [[9., 9.2, 9.8, 10.],
+              [9.28571429, 9.48571429, 10.08571429, 10.28571429],
+              [10.42857143, 10.62857143, 11.22857143, 11.42857143],
+              [11., 11.2, 11.8, 12.]]]]
+            ).astype(theano.config.floatX)
+        f_up_x = theano.function([], up_x, mode=self.compile_mode)
+        utt.assert_allclose(f_up_x(), num_up_x, rtol=1e-6)
 class TestConv2dTranspose(unittest.TestCase):
    mode = None