Correct docstring for spatial transformer op

上级 bc7218f8
......@@ -2888,9 +2888,10 @@ class GpuDnnTransformerDescriptor(COp):
class GpuDnnTransformer(DnnBase):
"""
This Op builds a spatial transformer that can be used in spatial transformer networks.
Spatial transformer that can be used in spatial transformer networks, it
implements the grid generator and sampler. The localization network can
be built using neural net components of Theano.
"""
__props__ = ('dtype',)
_cop_num_inputs = 6
_cop_num_outputs = 2
......@@ -3025,18 +3026,19 @@ class GpuDnnTransformerGradT(DnnBase):
return [[1], [0]]
def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=None,
def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, alpha=None, beta=None,
dtype=theano.config.floatX):
"""
GPU spatial transformer using cuDNN from NVIDIA.
Parameters
----------
inp : tensor
Input feature maps in format NCHW
(number of inputs, number of channels, height, width)
theta : matrix
Affine transformation matrix generated by the localization network.
img : tensor
Images to which the transformations will be applied.
theta : tensor
Affine transformation tensor containing one affine transformation
matrix per image. ``theta`` is usually generated by the localization
network.
scale_height: float
A float specifying the scaling factor for the height of the output
image. A value of 1 will keep the original height of the input. Values
......@@ -3051,37 +3053,37 @@ def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=No
Returns
-------
out : tensor
Transformed inputs with the shape
``(number of inputs, number of channels, round(height * scale_height), round(width * downsampling_factor))``.
Transformed images with width and height properly scaled.
Notes
-----
cuDNN currently only supports 2D transformations with 2x3 affine
transformation matrix. Also, the only sampler available is the
bilinear interpolation.
Currently, cuDNN only supports 2D transformations with 2x3 affine
transformation matrices.
Also, the only grid sampler method available is the bilinear interpolation.
"""
# inp is a 4D tensor with shape: (num_inputs, num_channels, height, width)
assert inp.ndim == 4
assert img.ndim == 4
# Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
assert theta.ndim == 3
grid_dims = (inp.shape[0], inp.shape[1],
inp.shape[2] * scale_height,
inp.shape[3] * scale_width)
grid_dims = (img.shape[0], img.shape[1],
img.shape[2] * scale_height,
img.shape[3] * scale_width)
grid_dims = tuple(map(lambda v: as_scalar(v).astype('int32'), list(grid_dims)))
inp = gpu_contiguous(inp)
img = gpu_contiguous(img)
theta = gpu_contiguous(theta)
output = GpuAllocEmpty(inp.dtype, infer_context_name(inp))(*grid_dims)
output = GpuAllocEmpty(img.dtype, infer_context_name(img))(*grid_dims)
# Create spatial transformer descriptor
desc = GpuDnnTransformerDescriptor(dtype)(grid_dims)
# Create grid dimensions variable
grid_dims_var = as_tensor_variable(grid_dims)
# Setup spatial transformer
transformer = GpuDnnTransformer(dtype)(inp, theta, output, grid_dims_var, desc, alpha, beta)
transformer = GpuDnnTransformer(dtype)(img, theta, output, grid_dims_var, desc, alpha, beta)
return transformer
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论