Add docstring and refactor spatial transformer to use downsample_factor instead of grid dims

上级 14766a3d
#section support_code_apply
int APPLY_SPECIFIC(spatialtf_desc)(cudnnSpatialTransformerDescriptor_t * desc,
int APPLY_SPECIFIC(spatialtf_desc)(npy_int32 dim_nimages,
npy_int32 dim_nchannels,
npy_int32 dim_height,
npy_int32 dim_width,
cudnnSpatialTransformerDescriptor_t * desc,
PARAMS_TYPE * params)
{
cudnnStatus_t err;
if ( params->nimages == 0 || params->nchannels == 0 ||
params->height == 0 || params->width == 0 )
const int nimages = (int) dim_nimages;
const int nchannels = (int) dim_nchannels;
const int height = (int) dim_height;
const int width = (int) dim_width;
if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 )
{
PyErr_SetString( PyExc_RuntimeError, "Invalid grid dimensions" );
return -1;
}
// num_images, num_channels, height, width
const int out_tensor_dims[4] = { params->nimages, params->nchannels, params->height, params->width };
const int out_tensor_dims[4] = { nimages, nchannels, height, width };
err = cudnnCreateSpatialTransformerDescriptor( desc );
if ( CUDNN_STATUS_SUCCESS != err )
......@@ -30,7 +38,7 @@ int APPLY_SPECIFIC(spatialtf_desc)(cudnnSpatialTransformerDescriptor_t * desc,
params->dtype, 4, out_tensor_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
PyErr_Format( PyExc_MemoryError,
"Failed to initialize spatial transformer descriptor: %s",
cudnnGetErrorString( err ) );
return -1;
......
......@@ -2840,9 +2840,8 @@ class GpuDnnSpatialTfDesc(COp):
operations.
"""
__props__ = ('dimensions', 'dtype')
params_type = ParamsType(nimages=int_t, nchannels=int_t, height=int_t, width=int_t,
dtype=cudnn.cudnnDataType_t)
__props__ = ('dtype',)
params_type = ParamsType(dtype=cudnn.cudnnDataType_t)
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h']
......@@ -2859,19 +2858,20 @@ class GpuDnnSpatialTfDesc(COp):
def do_constant_folding(self, node):
return False
def __init__(self, dimensions, dtype="float32"):
def __init__(self, dtype=theano.config.floatX):
COp.__init__(self, ["c_code/spatialtf_desc.c"], "APPLY_SPECIFIC(spatialtf_desc)")
# cuDNN supports only 2D transformations, therefore output tensor must
# have exactly 4 dimensions: (width, height, num_channels, num_images)
assert len(dimensions) == 4
self.dimensions = tuple(dimensions)
assert cudnn.cudnnDataType_t.has_alias(dtype)
self.dtype = dtype
def make_node(self):
node = Apply(self, [],
def make_node(self, dimensions):
# cuDNN supports only 2D transformations, therefore output tensor must
# have exactly 4 dimensions: (num_images, num_channels, height, width)
assert len(dimensions) == 4
dimensions = tuple(dimensions)
nimages, nchannels, height, width = dimensions
node = Apply(self, [nimages, nchannels, height, width],
[CDataType("cudnnSpatialTransformerDescriptor_t",
freefunc="cudnnDestroySpatialTransformerDescriptor")()])
# DebugMode cannot compare the values of CDataType variables, so by
......@@ -2882,15 +2882,6 @@ class GpuDnnSpatialTfDesc(COp):
out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return node
# Number of images
nimages = property(lambda self: self.dimensions[0])
# Number of channels
nchannels = property(lambda self: self.dimensions[1])
# Grid height
height = property(lambda self: self.dimensions[2])
# Grid width
width = property(lambda self: self.dimensions[3])
def c_code_cache_version(self):
return (super(GpuDnnSpatialTfDesc, self).c_code_cache_version(), version())
......@@ -2917,7 +2908,6 @@ class GpuDnnGridGenerator(DnnBase):
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64')
assert cudnn.cudnnDataType_t.has_alias(theta.dtype)
# Allocate GPU memory for grid of coordinates
grid = GpuArrayType(dtype=self.dtype,
......@@ -2971,35 +2961,57 @@ class GpuDnnGridSampler(DnnBase):
pass
def dnn_spatialtf(img, theta, grid_dims, alpha=None, beta=None, dtype=None):
def dnn_spatialtf(inp, theta, downsampling_factor=1, alpha=None, beta=None, dtype=theano.config.floatX):
"""
GPU spatial transformer using cuDNN from NVIDIA.
GPU spatial transformer using cuDNN from NVIDIA.
Parameters
----------
inp : tensor
Input feature maps in format NCHW
(number of inputs, number of channels, height, width)
theta : matrix
Affine transformation matrix generated by the localization network.
downsample_factor : float
A float specifying the downsample factor for the output image (in both
spatial dimensions). A value of 1 will keep the original size of the
input. Values larger than 1 will downsample the input. Values below 1
will upsample the input.
Returns
-------
out : tensor
Transformed inputs with the shape
``(number of inputs, number of channels, floor(height / downsampling_factor), floor(width / downsampling_factor))``.
Notes
-----
cuDNN currently only supports 2D transformations with 2x3 affine
transformation matrix. Also, the only sampler available is the
bilinear interpolation.
"""
# img is a 4D tensor with shape: (num_images, num_channels, width, height)
assert img.ndim == 4
# Grid dimensions must be a 4-dimensional tuple
assert isinstance(grid_dims, tuple)
assert len(grid_dims) == 4
# inp is a 4D tensor with shape: (num_inputs, num_channels, width, height)
assert inp.ndim == 4
# Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
assert theta.ndim == 3
img = gpu_contiguous(img)
theta = gpu_contiguous(theta)
grid_dims = (as_scalar(inp.shape[0]).astype('int32'),
as_scalar(inp.shape[1]).astype('int32'),
as_scalar(inp.shape[2] // downsampling_factor).astype('int32'),
as_scalar(inp.shape[3] // downsampling_factor).astype('int32'))
dtype = img.dtype if dtype is None else dtype
inp = gpu_contiguous(inp)
theta = gpu_contiguous(theta)
downsampling_factor = float(downsampling_factor)
# Create spatial transformer descriptor
desc = GpuDnnSpatialTfDesc(grid_dims, dtype)()
desc = GpuDnnSpatialTfDesc(dtype)(grid_dims)
# Create grid dimensions variable
grid_dims_var = as_tensor_variable(grid_dims)
# Setup grid of coordinates
# Setup and return sampling grid
grid_coord = GpuDnnGridGenerator(dtype)(grid_dims_var, theta, desc)
grid_sampler = GpuDnnGridSampler(dtype)(img, grid_coord, desc, alpha, beta)
grid_sampler = GpuDnnGridSampler(dtype)(inp, grid_coord, desc, alpha, beta)
return grid_sampler
......
......@@ -2441,11 +2441,9 @@ def test_dnn_spatialtf():
# Convert from NHWC to NCHW
img = np.transpose(img, axes=(0, 3, 1, 2)).astype(theano.config.floatX)
gpu_img = gpuarray_shared_constructor(img)
# Downsample image dimensions by a factor of 2, i.e. our output tensor will
# have shape (n, c, h / 2, w / 2)
downsample_factor = 2
grid_h = img_dims[1] // downsample_factor
grid_w = img_dims[2] // downsample_factor
grid_dims = (img_dims[0], img_dims[3], grid_h, grid_w)
# Transformation matrix
rotation = [[1, 0, 0],
......@@ -2454,7 +2452,7 @@ def test_dnn_spatialtf():
transform = np.asarray(img_dims[0] * [rotation], dtype=theano.config.floatX)
gpu_transform = gpuarray_shared_constructor(transform)
st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, grid_dims)
st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, downsample_factor)
st_dnn_func = theano.function([], [st_dnn])
# Check if function graph contains the spatial transformer Ops
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论