Change spatialtf to use scaling factors for width and height

Signed-off-by: João Victor Tozatti Risso <joaovictor.risso@gmail.com>

Change spatialtf to use scaling factors for width and height
94b64eed · João Victor Tozatti Risso · 044f1ac4 · 94b64eed · 94b64eed
--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -2961,7 +2961,8 @@ class GpuDnnGridSampler(DnnBase):
        pass


-def dnn_spatialtf(inp, theta, downsampling_factor=1, alpha=None, beta=None, dtype=theano.config.floatX):
+def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=None,
+                  dtype=theano.config.floatX):
    """
    GPU spatial transformer using cuDNN from NVIDIA.

@@ -2972,17 +2973,22 @@ def dnn_spatialtf(inp, theta, downsampling_factor=1, alpha=None, beta=None, dtyp
        (number of inputs, number of channels, height, width)
    theta : matrix
        Affine transformation matrix generated by the localization network.
-    downsample_factor : float
-        A float specifying the downsample factor for the output image (in both
-        spatial dimensions). A value of 1 will keep the original size of the
-        input. Values larger than 1 will downsample the input. Values below 1
-        will upsample the input.
+    scale_height: float
+        A float specifying the scaling factor for the height of the output
+        image. A value of 1 will keep the original height of the input. Values
+        larger than 1 will upsample the input. Values below 1 will downsample
+        the input.
+    scale_width: float
+        A float specifying the scaling factor for the width of the output
+        image. A value of 1 will keep the original width of the input. Values
+        larger than 1 will upsample the input. Values below 1 will downsample
+        the input.

    Returns
    -------
    out : tensor
        Transformed inputs with the shape
-        ``(number of inputs, number of channels, floor(height / downsampling_factor), floor(width / downsampling_factor))``.
+        ``(number of inputs, number of channels, round(height * scale_height), round(width * downsampling_factor))``.

    Notes
    -----
@@ -2991,19 +2997,18 @@ def dnn_spatialtf(inp, theta, downsampling_factor=1, alpha=None, beta=None, dtyp
    bilinear interpolation.
    """

-    # inp is a 4D tensor with shape: (num_inputs, num_channels, width, height)
+    # inp is a 4D tensor with shape: (num_inputs, num_channels, height, width)
    assert inp.ndim == 4
    # Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
    assert theta.ndim == 3

    grid_dims = (as_scalar(inp.shape[0]).astype('int32'),
                 as_scalar(inp.shape[1]).astype('int32'),
-                 as_scalar(inp.shape[2] // downsampling_factor).astype('int32'),
-                 as_scalar(inp.shape[3] // downsampling_factor).astype('int32'))
+                 as_scalar(inp.shape[2] * scale_height).astype('int32'),
+                 as_scalar(inp.shape[3] * scale_width).astype('int32'))

    inp = gpu_contiguous(inp)
    theta = gpu_contiguous(theta)
-    downsampling_factor = float(downsampling_factor)

    # Create spatial transformer descriptor
    desc = GpuDnnSpatialTfDesc(dtype)(grid_dims)

--- a/theano/gpuarray/tests/test_dnn.py
+++ b/theano/gpuarray/tests/test_dnn.py
@@ -2310,13 +2310,13 @@ def test_dnn_spatialtf():
    Spatial Transformer implementation using Theano from Lasagne
    Original author: skaae (https://github.com/skaae)
    """
-    def spatialtf_cpu(theta, inp, downsample_factor, border_mode='nearest'):
+    def spatialtf_cpu(theta, inp, scale_height, scale_width, border_mode='nearest'):
        num_batch, num_channels, height, width = inp.shape
        theta = T.reshape(theta, (-1, 2, 3))

        # grid of (x_t, y_t, 1), eq (1) in ref [1]
-        out_height = T.cast(height // downsample_factor, 'int64')
-        out_width = T.cast(width // downsample_factor, 'int64')
+        out_height = T.cast(T.ceil(height * scale_height), 'int64')
+        out_width = T.cast(T.ceil(width * scale_width), 'int64')
        grid = _meshgrid(out_height, out_width)
        # transform a x (x_t, y_t, 1)^t -> (x_s, y_s)
        t_g = T.dot(theta, grid)
@@ -2443,7 +2443,8 @@ def test_dnn_spatialtf():
    gpu_img = gpuarray_shared_constructor(img)
    # Downsample image dimensions by a factor of 2, i.e. our output tensor will
    # have shape (n, c, h / 2, w / 2)
-    downsample_factor = 2
+    scale_height = 0.25
+    scale_width = 0.75

    # Transformation matrix
    theta = [[-1, 0, 0],
@@ -2452,7 +2453,8 @@ def test_dnn_spatialtf():
    transform = np.asarray(img_dims[0] * [theta], dtype=theano.config.floatX)
    gpu_transform = gpuarray_shared_constructor(transform)

-    st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, downsample_factor)
+    st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, scale_height=scale_height,
+                               scale_width=scale_width)
    st_dnn_func = theano.function([], [st_dnn])

    # Check if function graph contains the spatial transformer Ops
@@ -2463,7 +2465,7 @@ def test_dnn_spatialtf():
    # Setup CPU Op
    t_img = T.tensor4('img')
    t_theta = T.tensor3('theta')
-    st_cpu = spatialtf_cpu(t_theta, t_img, downsample_factor, 'nearest')
+    st_cpu = spatialtf_cpu(t_theta, t_img, scale_height, scale_width, 'nearest')
    st_cpu_func = theano.function([t_theta, t_img], [st_cpu], mode=mode_without_gpu)
    res, = st_cpu_func(transform, img)