bilinear interpolation

0e35a6bc · Sina Honari · e09272f5 · 0e35a6bc · 0e35a6bc
--- a/theano/tensor/nnet/abstract_conv.py
+++ b/theano/tensor/nnet/abstract_conv.py
@@ -2,6 +2,7 @@
 Abstract conv interface
 """
+import numpy as np
 import logging
 from six import reraise
 import sys
@@ -125,6 +126,8 @@ def conv2d(input,
    Refer to :func:`nnet.conv2d <theano.tensor.nnet.conv2d>` for a more detailed documentation.
    """
+    input = as_tensor_variable(input)
+    filters = as_tensor_variable(filters)
    conv_op = AbstractConv2d(imshp=input_shape,
                             kshp=filter_shape,
                             border_mode=border_mode,
@@ -135,17 +138,18 @@ def conv2d(input,
 def conv2d_grad_wrt_inputs(output_grad,
                           filters,
-                           output_grad_shape=None,
+                           input_shape,
-                           input_shape=None,
                           filter_shape=None,
                           border_mode='valid',
                           subsample=(1, 1),
                           filter_flip=True):
-    """This function builds the symbolic graph for getting the
+    """Compute conv output gradient w.r.t its inputs
+    This function builds the symbolic graph for getting the
    gradient of the output of a convolution (namely output_grad)
    w.r.t the input of the convolution, given a set of 2D filters
    used by the convolution, such that the output_grad is upsampled
-    to the input shape.
+    to the input_shape.
    Parameters
    ----------
@@ -154,27 +158,24 @@ def conv2d_grad_wrt_inputs(output_grad,
        channels, input rows, input columns).  This is the tensor that
        will be upsampled or the output gradient of the convolution
        whose gradient will be taken with respect to the input of the
-        convolution.  See the optional parameter
+        convolution.
-        ``output_grad_shape``.
    filters : symbolic 4D tensor
        set of filters used in CNN layer of shape (output channels,
        input channels, filter rows, filter columns).  See the
        optional parameter ``filter_shape``.
-    output_grad_shape : list of 4 symbolic or real ints
+    input_shape : [None/int/Constant] * 2 + [Tensor/int/Constant] * 2
-        The shape of the output_grad parameter.  Optional, possibly
+        The shape of the input (upsampled) parameter.
-        used to choose an optimal implementation.  You can give
+        A tuple/list of len 4, with the first two dimensions
-        ``None`` for any element of the list to specify that this
+        being None or int or Constant and the last two dimensions being
-        element is not known at compile time.
+        Tensor or int or Constant.
-    input_shape : list of 2 symbolic or real ints
+        Not Optional, since given the output_grad shape
-        The shape (row and column size) of the input (upsampled)
-        parameter.  Not Optional, since given the output_grad_shape
        and the subsample values, multiple input_shape may be
        plausible.
-    filter_shape : list of 4 symbolic or real ints
+    filter_shape : None or [None/int/Constant] * 4
-        The shape of the filters parameter.  Optional, possibly used
+        The shape of the filters parameter. None or a tuple/list of len 4.
-        to choose an optimal implementation.  You can give ``None``
+        Optional, possibly used  to choose an optimal implementation.
-        for any element of the list to specify that this element is
+        You can give ``None`` for any element of the list to specify that
-        not known at compile time.
+        this element is not known at compile time.
    border_mode : str, int or tuple of two int
        Either of the following:
@@ -192,7 +193,7 @@ def conv2d_grad_wrt_inputs(output_grad,
            rows and ``filter columns // 2`` columns, then perform a
            valid convolution. For filters with an odd number of rows
            and columns, this leads to the output shape being equal to
-            the input shape.
+            the input shape. It is known as 'same' elsewhere.
          ``int``
            pad input with a symmetric border of zeros of the given
@@ -231,24 +232,50 @@ def conv2d_grad_wrt_inputs(output_grad,
    """
-    grad_input_op = AbstractConv2d_gradInputs(imshp=input_shape,
+    filters = as_tensor_variable(filters)
+    output_grad = as_tensor_variable(output_grad)
+    # checking the type of input_shape
+    for dim in [0, 1]:
+        assert isinstance(input_shape[dim], (theano.tensor.TensorConstant,
+                                             int, type(None)))
+    for dim in [2, 3]:
+        assert isinstance(input_shape[dim], (theano.tensor.TensorVariable,
+                                             theano.tensor.TensorConstant,
+                                             int))
+    # checking the type of filter_shape
+    if filter_shape is not None:
+        for dim in [0, 1, 2, 3]:
+            assert isinstance(filter_shape[dim], (theano.tensor.TensorConstant,
+                                                  int, type(None)))
+    # setting the last two dimensions of input_shape to None, if
+    # the type of these dimensions is TensorVariable.
+    numerical_input_shape = list(input_shape)
+    for dim in [2, 3]:
+        if isinstance(input_shape[dim], theano.tensor.TensorVariable):
+            numerical_input_shape[dim] = None
+    grad_input_op = AbstractConv2d_gradInputs(imshp=numerical_input_shape,
                                              kshp=filter_shape,
                                              border_mode=border_mode,
                                              subsample=subsample,
                                              filter_flip=filter_flip)
-    return grad_input_op(filters, output_grad, input_shape)
+    return grad_input_op(filters, output_grad, input_shape[-2:])
 def conv2d_grad_wrt_weights(input,
                            output_grad,
+                            filter_shape,
                            input_shape=None,
-                            output_grad_shape=None,
-                            filter_shape=None,
                            border_mode='valid',
                            subsample=(1, 1),
                            filter_flip=True):
-    """This function will build the symbolic graph for getting the
+    """Compute conv output gradient w.r.t its weights
+    This function will build the symbolic graph for getting the
    gradient of the output of a convolution (output_grad) w.r.t its wights.
    Parameters
@@ -261,25 +288,17 @@ def conv2d_grad_wrt_weights(input,
        mini-batch of feature map stacks, of shape (batch size, input
        channels, input rows, input columns).  This is the gradient of
        the output of convolution.
-    filters : symbolic 4D tensor.
+    filter_shape : [None/int/Constant] * 2 + [Tensor/int/Constant] * 2
-        set of filters used in CNN layer of shape (output channels,
+        The shape of the filter parameter.  A tuple/list of len 4, with the
-        input channels, filter rows, filter columns).  See the
+        first two dimensions being None or int or Constant and the last two
-        optional parameter ``filter_shape``.
+        dimensions being Tensor or int or Constant.
-    output_grad_shape : list of 4 ints or Constant variables
+        Not Optional, since given the output_grad shape and
-        The shape of the input parameter.  Optional, possibly used to
+        the input_shape, multiple filter_shape may be plausible.
-        choose an optimal implementation.  You can give ``None`` for
+    input_shape : None or [None/int/Constant] * 4
-        any element of the list to specify that this element is not
+        The shape of the input parameter. None or a tuple/list of len 4.
-        known at compile time.
-    input_shape : list of 2 ints or Constant variables
-        The shape of the input parameter.  This parameter indicates
-        the row and column size of the input in the forward pass.
        Optional, possibly used to choose an optimal implementation.
        You can give ``None`` for any element of the list to specify
        that this element is not known at compile time.
-    filter_shape : list of 4 ints or Constant variables
-        The shape of the filters parameter.  Not Optional, since given
-        the output_grad_shape and the input_shape, multiple
-        filter_shape may be plausible.
    border_mode : str, int or tuple of two ints
        Either of the following:
@@ -297,7 +316,7 @@ def conv2d_grad_wrt_weights(input,
            rows and ``filter columns // 2`` columns, then perform a
            valid convolution. For filters with an odd number of rows
            and columns, this leads to the output shape being equal to
-            the input shape.
+            the input shape. It is known as 'same' elsewhere.
          ``int``
            pad input with a symmetric border of zeros of the given
@@ -335,13 +354,217 @@ def conv2d_grad_wrt_weights(input,
        version until it is released.
    """
+    input = as_tensor_variable(input)
+    output_grad = as_tensor_variable(output_grad)
+    # checking the type of filter_shape
+    for dim in [0, 1]:
+        assert isinstance(filter_shape[dim], (theano.tensor.TensorConstant,
+                                              int, type(None)))
+    for dim in [2, 3]:
+        assert isinstance(filter_shape[dim], (theano.tensor.TensorVariable,
+                                              theano.tensor.TensorConstant,
+                                              int))
+    # checking the type of input_shape
+    if input_shape is not None:
+        for dim in [0, 1, 2, 3]:
+            assert isinstance(input_shape[dim], (theano.tensor.TensorConstant,
+                                                 int, type(None)))
+    # setting the last two dimensions of filter_shape to None, if
+    # the type of these dimensions is TensorVariable.
+    numerical_filter_shape = list(filter_shape)
+    for dim in [2, 3]:
+        if isinstance(filter_shape[dim], theano.tensor.TensorVariable):
+            numerical_filter_shape[dim] = None
    gradWeight_op = AbstractConv2d_gradWeights(imshp=input_shape,
-                                               kshp=filter_shape,
+                                               kshp=numerical_filter_shape,
                                               border_mode=border_mode,
                                               subsample=subsample,
                                               filter_flip=filter_flip)
-    return gradWeight_op(input, output_grad, filter_shape)
+    return gradWeight_op(input, output_grad, filter_shape[:-2])
+def bilinear_kernel_2D(ratio, normalize=True):
+    """Compute 2D kernel for bilinear upsampling
+    This function builds the 2D kernel that can be used to upsample
+    a tensor by the given ratio using bilinear interpolation.
+    Parameters
+    ----------
+    ratio: int or Constant/Scalar Theano tensor of int* dtype
+        the ratio by which an image will be upsampled by the returned filter
+        in the 2D space.
+    normalize: bool
+        param normalize: indicates whether to normalize the kernel or not.
+        Default is True.
+    Returns
+    -------
+    symbolic 2D tensor
+        the 2D kernels that can be applied to any given image to upsample it
+        by the indicated ratio using bilinear interpolation in two dimensions.
+    """
+    hkern = bilinear_kernel_1D(ratio=ratio, normalize=normalize).dimshuffle('x', 0)
+    vkern = bilinear_kernel_1D(ratio=ratio, normalize=normalize).dimshuffle(0, 'x')
+    kern = hkern * vkern
+    return kern
+def bilinear_kernel_1D(ratio, normalize=True):
+    """Compute 1D kernel for bilinear upsampling
+    This function builds the 1D kernel that can be used to upsample
+    a tensor by the given ratio using bilinear interpolation.
+    Parameters
+    ----------
+    ratio: int or Constant/Scalar Theano tensor of int* dtype
+        the ratio by which an image will be upsampled by the returned filter
+        in the 2D space.
+    normalize: bool
+        param normalize: indicates whether to normalize the kernel or not.
+        Default is True.
+    Returns
+    -------
+    symbolic 1D tensor
+        the 1D kernels that can be applied to any given image to upsample it
+        by the indicated ratio using bilinear interpolation in one dimension.
+    """
+    T = theano.tensor
+    half_kern = T.arange(1, ratio + 1, dtype=theano.config.floatX)
+    kern = T.concatenate([half_kern, half_kern[-2::-1]])
+    if normalize:
+        kern /= ratio
+    return kern
+def bilinear_upsampling(input,
+                        ratio,
+                        batch_size=None,
+                        num_input_channels=None,
+                        use_1D_kernel=True):
+    """Compute bilinear upsampling
+    This function will build the symbolic graph for upsampling
+    a tensor by the given ratio using bilinear interpolation.
+    Parameters
+    ----------
+    input: symbolic 4D tensor
+        mini-batch of feature map stacks, of shape (batch size,
+        input channels, input rows, input columns) that will be upsampled.
+    ratio: int or Constant or Scalar Tensor of int* dtype
+        the ratio by which the input is upsampled in the 2D space (row and
+        col size).
+    batch_size: None, int or Constant variable
+        The size of the first dimension of the input variable.
+        Optional, possibly used to choose an optimal implementation.
+        batch_size will be used only if num_input_channels is not None.
+    num_input_channels: None, int or Constant variable
+        The size of the second dimension of the input variable.
+        Optional, possibly used to choose an optimal implementation.
+        num_input_channels will be used only if batch_size is not None.
+    use_1D_kernel: bool
+        if set to true, row and column will be upsampled seperately by 1D
+        kernels, otherwise they are upsampled together using a 2D kernel. The
+        final result is the same, only the speed can differ, given factors such
+        as upsampling ratio.
+    Returns
+    -------
+    symbolic 4D tensor
+        set of feature maps generated by bilinear upsampling. Tensor
+        is of shape (batch size, num_input_channels, input row size * ratio,
+        input column size * ratio)
+    Notes
+    -----
+    :note: The kernel used for bilinear interpolation is fixed (not learned).
+    :note: When the upsampling ratio is even, the last row and column is
+        repeated one extra time compared to the first row and column which makes
+        the upsampled tensor asymmetrical on both sides. This does not happen when
+        the upsampling ratio is odd.
+    """
+    T = theano.tensor
+    try:
+        up_bs = batch_size * num_input_channels
+    except TypeError:
+        up_bs = None
+    row, col = input.shape[2:]
+    up_input = input.reshape((-1, 1, row, col))
+    # concatenating the first and last row and column
+    # first and last row
+    concat_mat = T.concatenate((up_input[:, :, :1, :], up_input,
+                                up_input[:, :, -1:, :]), axis=2)
+    # first and last col
+    concat_mat = T.concatenate((concat_mat[:, :, :, :1], concat_mat,
+                                concat_mat[:, :, :, -1:]), axis=3)
+    pad = 2 * ratio - (ratio - 1) // 2 - 1
+    if use_1D_kernel:
+        kern = bilinear_kernel_1D(ratio=ratio, normalize=True)
+        # upsampling rows
+        upsampled_row = conv2d_grad_wrt_inputs(output_grad=concat_mat,
+                                               filters=kern[np.newaxis,
+                                                            np.newaxis, :,
+                                                            np.newaxis],
+                                               input_shape=(up_bs, 1,
+                                                            row * ratio, col),
+                                               filter_shape=(1, 1, None, 1),
+                                               border_mode=(pad, 0),
+                                               subsample=(ratio, 1),
+                                               filter_flip=True)
+        # upsampling cols
+        upsampled_mat = conv2d_grad_wrt_inputs(output_grad=upsampled_row,
+                                               filters=kern[np.newaxis,
+                                                            np.newaxis,
+                                                            np.newaxis, :],
+                                               input_shape=(up_bs, 1,
+                                                            row * ratio,
+                                                            col * ratio),
+                                               filter_shape=(1, 1, 1, None),
+                                               border_mode=(0, pad),
+                                               subsample=(1, ratio),
+                                               filter_flip=True)
+    else:
+        kern = bilinear_kernel_2D(ratio=ratio, normalize=True)
+        upsampled_mat = conv2d_grad_wrt_inputs(output_grad=concat_mat,
+                                               filters=kern[np.newaxis,
+                                                            np.newaxis, :, :],
+                                               input_shape=(up_bs, 1,
+                                                            row * ratio,
+                                                            col * ratio),
+                                               filter_shape=(1, 1, None, None),
+                                               border_mode=(pad, pad),
+                                               subsample=(ratio, ratio),
+                                               filter_flip=True)
+    return upsampled_mat.reshape((batch_size, num_input_channels,
+                                  row * ratio, col * ratio))
 class BaseAbstractConv2d(Op):

--- a/theano/tensor/nnet/tests/test_abstract_conv.py
+++ b/theano/tensor/nnet/tests/test_abstract_conv.py
 import numpy
+import numpy as np
 import unittest
 from nose.plugins.skip import SkipTest
@@ -12,6 +13,9 @@ from theano.tensor.nnet.abstract_conv import get_conv_output_shape
 from theano.tensor.nnet.abstract_conv import AbstractConv2d
 from theano.tensor.nnet.abstract_conv import AbstractConv2d_gradInputs
 from theano.tensor.nnet.abstract_conv import AbstractConv2d_gradWeights
+from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
+from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D
+from theano.tensor.nnet.abstract_conv import bilinear_upsampling
 from theano.tensor.nnet.conv import ConvOp
 from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
                                     CorrMM_gradInputs)
@@ -529,3 +533,180 @@ class TestConvTypes(unittest.TestCase):
        grad_filters = theano.grad(grad_input.sum(), wrt=filters)
        assert grad_filters.type == filters.type, (
            grad_filters, grad_filters.type, filters, filters.type)
+class TestBilinearUpsampling(unittest.TestCase):
+    def numerical_kernel_1D(self, ratio):
+        """Gets numerical 1D kernel for bilinear upsampling"""
+        return np.array(list(range(1, ratio + 1)) +
+                        list(range(ratio - 1, 0, -1)))
+    def numerical_kernel_2D(self, ratio):
+        """Gets numerical 2D kernel for bilinear upsampling"""
+        return np.array([i * j for i in self.numerical_kernel_1D(ratio) for j
+                         in self.numerical_kernel_1D(ratio)]).\
+            reshape(2 * ratio - 1, 2 * ratio - 1)
+    def test_bilinear_kernel_2D(self):
+        """Test 2D kernels used in bilinear upsampling
+        This method tests the correctness of the
+        2D kernel values used in bilinear upsampling
+        for some upsampling ratios.
+        """
+        for ratio in [2, 3, 4, 5, 6, 7, 8, 9]:
+            # getting the un-normalized kernel
+            kernel = bilinear_kernel_2D(ratio=ratio, normalize=False)
+            f = theano.function([], kernel)
+            kernel_2D = self.numerical_kernel_2D(ratio)
+            np.testing.assert_allclose(kernel_2D, f())
+            # getting the normalized kernel
+            kernel = bilinear_kernel_2D(ratio=ratio, normalize=True)
+            f = theano.function([], kernel)
+            kernel_2D = kernel_2D / float(ratio**2)
+            np.testing.assert_allclose(kernel_2D, f())
+    def test_bilinear_kernel_1D(self):
+        """Test 1D kernels used in bilinear upsampling
+        This method tests the correctness of the
+        1D kernel values used in bilinear upsampling
+        for some upsampling ratios.
+        """
+        rat = tensor.iscalar()
+        kernel_ten = bilinear_kernel_1D(ratio=rat, normalize=False)
+        f_ten = theano.function([rat], kernel_ten)
+        kernel_ten_norm = bilinear_kernel_1D(ratio=rat, normalize=True)
+        f_ten_norm = theano.function([rat], kernel_ten_norm)
+        for ratio in [2, 3, 4, 5, 6, 7, 8, 9]:
+            # getting the un-normalized kernel
+            kernel = bilinear_kernel_1D(ratio=ratio, normalize=False)
+            f = theano.function([], kernel)
+            kernel_1D = self.numerical_kernel_1D(ratio)
+            np.testing.assert_allclose(kernel_1D, f())
+            np.testing.assert_allclose(kernel_1D, f_ten(ratio))
+            # getting the normalized kernel
+            kernel = bilinear_kernel_1D(ratio=ratio, normalize=True)
+            f = theano.function([], kernel)
+            kernel_1D = kernel_1D / float(ratio)
+            np.testing.assert_allclose(kernel_1D, f())
+            np.testing.assert_allclose(kernel_1D, f_ten_norm(ratio))
+    def numerical_upsampling_multiplier(self, ratio):
+        """Compute upsampling multiplier
+        This method computes the multipliers of an array
+        that will be upsampled using bilinear interpolation.
+        Parameters
+        ----------
+        ratio: int
+            the ratio by which the array will be upsampled.
+        Returns
+        -------
+        1D numpy array
+            The multiplers that can be used in bilinear interpolation
+            to upsample an array.
+        int
+            The size of the multipliers array
+        """
+        kern = np.arange(ratio + 1)
+        return kern, kern.shape[0]
+    def get_upsampled_twobytwo_mat(self, two_by_two, ratio):
+        """Upsample 4D array with two rows and two columns
+        This method gets a 4D numpy array with two rows and two columns
+        and computes its upsampled array by using bilinear interpolation
+        Parameters
+        ----------
+        two_by_two: numpy 4D array
+            The array that will be upsampled by bilinear interpolation.
+            Array is of shape (batch size, num channels, 2, 2)
+        ratio: int
+            The ratio by which two_by_two's last
+            two dimensions (row and col) will be upsampled.
+        Returns
+        -------
+        4D numpy array
+            The array upsampled by using bilinear interpolation. Array
+            is of shape (batch size, num channels, 2*ratio, 2*ratio).
+        """
+        kern, shp = self.numerical_upsampling_multiplier(ratio)
+        up_1D = two_by_two[:, :, :, :1] * kern[::-1] + \
+            two_by_two[:, :, :, 1:] * kern
+        up_2D = up_1D[:, :, :1, :] * kern[::-1][:, np.newaxis] + \
+            up_1D[:, :, 1:, :] * kern[:, np.newaxis]
+        num_concat = (ratio - 1) // 2
+        for i in range(num_concat):
+            up_2D = np.concatenate([up_2D[:, :, :1, :], up_2D], axis=2)
+            up_2D = np.concatenate([up_2D, up_2D[:, :, -1:, :]], axis=2)
+            up_2D = np.concatenate([up_2D[:, :, :, :1], up_2D], axis=3)
+            up_2D = np.concatenate([up_2D, up_2D[:, :, :, -1:]], axis=3)
+        if ratio % 2 == 0:
+            up_2D = np.concatenate([up_2D, up_2D[:, :, -1:, :]], axis=2)
+            up_2D = np.concatenate([up_2D, up_2D[:, :, :, -1:]], axis=3)
+        return up_2D / float(ratio)**2
+    def test_bilinear_upsampling_1D(self):
+        """Test bilinear upsampling using 1D kernels
+        This method tests the bilinear_upsampling method
+        when using 1D kernels for some upsampling ratios.
+        """
+        # upsampling for a ratio of two
+        input_x = np.array([[[[1, 2], [3, 4]]]], dtype=theano.config.floatX)
+        for ratio in [2, 3, 4, 5, 6, 7, 8, 9]:
+            bilin_mat = bilinear_upsampling(input=input_x, ratio=ratio,
+                                            batch_size=1, num_input_channels=1,
+                                            use_1D_kernel=True)
+            f = theano.function([], bilin_mat)
+            up_mat_2d = self.get_upsampled_twobytwo_mat(input_x, ratio)
+            np.testing.assert_allclose(f(), up_mat_2d, rtol=1e-06)
+    def test_compare_1D_and_2D_upsampling_values(self):
+        """Compare 1D and 2D upsampling
+        This method verifies the bilinear upsampling done by using
+        1D and 2D kernels will generate the same result.
+        """
+        # checking upsampling with ratio 5
+        input_x = np.random.rand(5, 4, 6, 7).astype(theano.config.floatX)
+        mat_1D = bilinear_upsampling(input=input_x, ratio=5,
+                                     batch_size=5, num_input_channels=4,
+                                     use_1D_kernel=True)
+        mat_2D = bilinear_upsampling(input=input_x, ratio=5,
+                                     batch_size=5, num_input_channels=4,
+                                     use_1D_kernel=False)
+        f_1D = theano.function([], mat_1D)
+        f_2D = theano.function([], mat_2D)
+        np.testing.assert_allclose(f_1D(), f_2D(), rtol=1e-06)
+        # checking upsampling with ratio 8
+        input_x = np.random.rand(12, 11, 10, 7).astype(theano.config.floatX)
+        mat_1D = bilinear_upsampling(input=input_x, ratio=8,
+                                     batch_size=12, num_input_channels=11,
+                                     use_1D_kernel=True)
+        mat_2D = bilinear_upsampling(input=input_x, ratio=8,
+                                     batch_size=12, num_input_channels=11,
+                                     use_1D_kernel=False)
+        f_1D = theano.function([], mat_1D)
+        f_2D = theano.function([], mat_2D)
+        np.testing.assert_allclose(f_1D(), f_2D(), rtol=1e-06)