Change spatialtf to use scaling factors for width and height

上级 044f1ac4
......@@ -2961,7 +2961,8 @@ class GpuDnnGridSampler(DnnBase):
pass
def dnn_spatialtf(inp, theta, downsampling_factor=1, alpha=None, beta=None, dtype=theano.config.floatX):
def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=None,
dtype=theano.config.floatX):
"""
GPU spatial transformer using cuDNN from NVIDIA.
......@@ -2972,17 +2973,22 @@ def dnn_spatialtf(inp, theta, downsampling_factor=1, alpha=None, beta=None, dtyp
(number of inputs, number of channels, height, width)
theta : matrix
Affine transformation matrix generated by the localization network.
downsample_factor : float
A float specifying the downsample factor for the output image (in both
spatial dimensions). A value of 1 will keep the original size of the
input. Values larger than 1 will downsample the input. Values below 1
will upsample the input.
scale_height: float
A float specifying the scaling factor for the height of the output
image. A value of 1 will keep the original height of the input. Values
larger than 1 will upsample the input. Values below 1 will downsample
the input.
scale_width: float
A float specifying the scaling factor for the width of the output
image. A value of 1 will keep the original width of the input. Values
larger than 1 will upsample the input. Values below 1 will downsample
the input.
Returns
-------
out : tensor
Transformed inputs with the shape
``(number of inputs, number of channels, floor(height / downsampling_factor), floor(width / downsampling_factor))``.
``(number of inputs, number of channels, round(height * scale_height), round(width * downsampling_factor))``.
Notes
-----
......@@ -2991,19 +2997,18 @@ def dnn_spatialtf(inp, theta, downsampling_factor=1, alpha=None, beta=None, dtyp
bilinear interpolation.
"""
# inp is a 4D tensor with shape: (num_inputs, num_channels, width, height)
# inp is a 4D tensor with shape: (num_inputs, num_channels, height, width)
assert inp.ndim == 4
# Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
assert theta.ndim == 3
grid_dims = (as_scalar(inp.shape[0]).astype('int32'),
as_scalar(inp.shape[1]).astype('int32'),
as_scalar(inp.shape[2] // downsampling_factor).astype('int32'),
as_scalar(inp.shape[3] // downsampling_factor).astype('int32'))
as_scalar(inp.shape[2] * scale_height).astype('int32'),
as_scalar(inp.shape[3] * scale_width).astype('int32'))
inp = gpu_contiguous(inp)
theta = gpu_contiguous(theta)
downsampling_factor = float(downsampling_factor)
# Create spatial transformer descriptor
desc = GpuDnnSpatialTfDesc(dtype)(grid_dims)
......
......@@ -2310,13 +2310,13 @@ def test_dnn_spatialtf():
Spatial Transformer implementation using Theano from Lasagne
Original author: skaae (https://github.com/skaae)
"""
def spatialtf_cpu(theta, inp, downsample_factor, border_mode='nearest'):
def spatialtf_cpu(theta, inp, scale_height, scale_width, border_mode='nearest'):
num_batch, num_channels, height, width = inp.shape
theta = T.reshape(theta, (-1, 2, 3))
# grid of (x_t, y_t, 1), eq (1) in ref [1]
out_height = T.cast(height // downsample_factor, 'int64')
out_width = T.cast(width // downsample_factor, 'int64')
out_height = T.cast(T.ceil(height * scale_height), 'int64')
out_width = T.cast(T.ceil(width * scale_width), 'int64')
grid = _meshgrid(out_height, out_width)
# transform a x (x_t, y_t, 1)^t -> (x_s, y_s)
t_g = T.dot(theta, grid)
......@@ -2443,7 +2443,8 @@ def test_dnn_spatialtf():
gpu_img = gpuarray_shared_constructor(img)
# Downsample image dimensions by a factor of 2, i.e. our output tensor will
# have shape (n, c, h / 2, w / 2)
downsample_factor = 2
scale_height = 0.25
scale_width = 0.75
# Transformation matrix
theta = [[-1, 0, 0],
......@@ -2452,7 +2453,8 @@ def test_dnn_spatialtf():
transform = np.asarray(img_dims[0] * [theta], dtype=theano.config.floatX)
gpu_transform = gpuarray_shared_constructor(transform)
st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, downsample_factor)
st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, scale_height=scale_height,
scale_width=scale_width)
st_dnn_func = theano.function([], [st_dnn])
# Check if function graph contains the spatial transformer Ops
......@@ -2463,7 +2465,7 @@ def test_dnn_spatialtf():
# Setup CPU Op
t_img = T.tensor4('img')
t_theta = T.tensor3('theta')
st_cpu = spatialtf_cpu(t_theta, t_img, downsample_factor, 'nearest')
st_cpu = spatialtf_cpu(t_theta, t_img, scale_height, scale_width, 'nearest')
st_cpu_func = theano.function([t_theta, t_img], [st_cpu], mode=mode_without_gpu)
res, = st_cpu_func(transform, img)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论