Add docstring and refactor spatial transformer to use downsample_factor instead of grid dims

上级 14766a3d
#section support_code_apply #section support_code_apply
int APPLY_SPECIFIC(spatialtf_desc)(cudnnSpatialTransformerDescriptor_t * desc, int APPLY_SPECIFIC(spatialtf_desc)(npy_int32 dim_nimages,
npy_int32 dim_nchannels,
npy_int32 dim_height,
npy_int32 dim_width,
cudnnSpatialTransformerDescriptor_t * desc,
PARAMS_TYPE * params) PARAMS_TYPE * params)
{ {
cudnnStatus_t err; cudnnStatus_t err;
if ( params->nimages == 0 || params->nchannels == 0 || const int nimages = (int) dim_nimages;
params->height == 0 || params->width == 0 ) const int nchannels = (int) dim_nchannels;
const int height = (int) dim_height;
const int width = (int) dim_width;
if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 )
{ {
PyErr_SetString( PyExc_RuntimeError, "Invalid grid dimensions" ); PyErr_SetString( PyExc_RuntimeError, "Invalid grid dimensions" );
return -1; return -1;
} }
// num_images, num_channels, height, width // num_images, num_channels, height, width
const int out_tensor_dims[4] = { params->nimages, params->nchannels, params->height, params->width }; const int out_tensor_dims[4] = { nimages, nchannels, height, width };
err = cudnnCreateSpatialTransformerDescriptor( desc ); err = cudnnCreateSpatialTransformerDescriptor( desc );
if ( CUDNN_STATUS_SUCCESS != err ) if ( CUDNN_STATUS_SUCCESS != err )
...@@ -30,7 +38,7 @@ int APPLY_SPECIFIC(spatialtf_desc)(cudnnSpatialTransformerDescriptor_t * desc, ...@@ -30,7 +38,7 @@ int APPLY_SPECIFIC(spatialtf_desc)(cudnnSpatialTransformerDescriptor_t * desc,
params->dtype, 4, out_tensor_dims ); params->dtype, 4, out_tensor_dims );
if ( CUDNN_STATUS_SUCCESS != err ) if ( CUDNN_STATUS_SUCCESS != err )
{ {
PyErr_Format( PyExc_MemoryError, PyErr_Format( PyExc_MemoryError,
"Failed to initialize spatial transformer descriptor: %s", "Failed to initialize spatial transformer descriptor: %s",
cudnnGetErrorString( err ) ); cudnnGetErrorString( err ) );
return -1; return -1;
......
...@@ -2840,9 +2840,8 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2840,9 +2840,8 @@ class GpuDnnSpatialTfDesc(COp):
operations. operations.
""" """
__props__ = ('dimensions', 'dtype') __props__ = ('dtype',)
params_type = ParamsType(nimages=int_t, nchannels=int_t, height=int_t, width=int_t, params_type = ParamsType(dtype=cudnn.cudnnDataType_t)
dtype=cudnn.cudnnDataType_t)
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -2859,19 +2858,20 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2859,19 +2858,20 @@ class GpuDnnSpatialTfDesc(COp):
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
def __init__(self, dimensions, dtype="float32"): def __init__(self, dtype=theano.config.floatX):
COp.__init__(self, ["c_code/spatialtf_desc.c"], "APPLY_SPECIFIC(spatialtf_desc)") COp.__init__(self, ["c_code/spatialtf_desc.c"], "APPLY_SPECIFIC(spatialtf_desc)")
# cuDNN supports only 2D transformations, therefore output tensor must
# have exactly 4 dimensions: (width, height, num_channels, num_images)
assert len(dimensions) == 4
self.dimensions = tuple(dimensions)
assert cudnn.cudnnDataType_t.has_alias(dtype) assert cudnn.cudnnDataType_t.has_alias(dtype)
self.dtype = dtype self.dtype = dtype
def make_node(self): def make_node(self, dimensions):
node = Apply(self, [], # cuDNN supports only 2D transformations, therefore output tensor must
# have exactly 4 dimensions: (num_images, num_channels, height, width)
assert len(dimensions) == 4
dimensions = tuple(dimensions)
nimages, nchannels, height, width = dimensions
node = Apply(self, [nimages, nchannels, height, width],
[CDataType("cudnnSpatialTransformerDescriptor_t", [CDataType("cudnnSpatialTransformerDescriptor_t",
freefunc="cudnnDestroySpatialTransformerDescriptor")()]) freefunc="cudnnDestroySpatialTransformerDescriptor")()])
# DebugMode cannot compare the values of CDataType variables, so by # DebugMode cannot compare the values of CDataType variables, so by
...@@ -2882,15 +2882,6 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2882,15 +2882,6 @@ class GpuDnnSpatialTfDesc(COp):
out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return node return node
# Number of images
nimages = property(lambda self: self.dimensions[0])
# Number of channels
nchannels = property(lambda self: self.dimensions[1])
# Grid height
height = property(lambda self: self.dimensions[2])
# Grid width
width = property(lambda self: self.dimensions[3])
def c_code_cache_version(self): def c_code_cache_version(self):
return (super(GpuDnnSpatialTfDesc, self).c_code_cache_version(), version()) return (super(GpuDnnSpatialTfDesc, self).c_code_cache_version(), version())
...@@ -2917,7 +2908,6 @@ class GpuDnnGridGenerator(DnnBase): ...@@ -2917,7 +2908,6 @@ class GpuDnnGridGenerator(DnnBase):
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name)) theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64') assert theta.dtype in ('float16', 'float32', 'float64')
assert cudnn.cudnnDataType_t.has_alias(theta.dtype)
# Allocate GPU memory for grid of coordinates # Allocate GPU memory for grid of coordinates
grid = GpuArrayType(dtype=self.dtype, grid = GpuArrayType(dtype=self.dtype,
...@@ -2971,35 +2961,57 @@ class GpuDnnGridSampler(DnnBase): ...@@ -2971,35 +2961,57 @@ class GpuDnnGridSampler(DnnBase):
pass pass
def dnn_spatialtf(img, theta, grid_dims, alpha=None, beta=None, dtype=None): def dnn_spatialtf(inp, theta, downsampling_factor=1, alpha=None, beta=None, dtype=theano.config.floatX):
""" """
GPU spatial transformer using cuDNN from NVIDIA. GPU spatial transformer using cuDNN from NVIDIA.
Parameters
----------
inp : tensor
Input feature maps in format NCHW
(number of inputs, number of channels, height, width)
theta : matrix
Affine transformation matrix generated by the localization network.
downsample_factor : float
A float specifying the downsample factor for the output image (in both
spatial dimensions). A value of 1 will keep the original size of the
input. Values larger than 1 will downsample the input. Values below 1
will upsample the input.
Returns
-------
out : tensor
Transformed inputs with the shape
``(number of inputs, number of channels, floor(height / downsampling_factor), floor(width / downsampling_factor))``.
Notes
-----
cuDNN currently only supports 2D transformations with 2x3 affine
transformation matrix. Also, the only sampler available is the
bilinear interpolation.
""" """
# img is a 4D tensor with shape: (num_images, num_channels, width, height) # inp is a 4D tensor with shape: (num_inputs, num_channels, width, height)
assert img.ndim == 4 assert inp.ndim == 4
# Grid dimensions must be a 4-dimensional tuple
assert isinstance(grid_dims, tuple)
assert len(grid_dims) == 4
# Theta is an array of transformation matrices and must have shape: (num_images, 2, 3) # Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
assert theta.ndim == 3 assert theta.ndim == 3
img = gpu_contiguous(img) grid_dims = (as_scalar(inp.shape[0]).astype('int32'),
theta = gpu_contiguous(theta) as_scalar(inp.shape[1]).astype('int32'),
as_scalar(inp.shape[2] // downsampling_factor).astype('int32'),
as_scalar(inp.shape[3] // downsampling_factor).astype('int32'))
dtype = img.dtype if dtype is None else dtype inp = gpu_contiguous(inp)
theta = gpu_contiguous(theta)
downsampling_factor = float(downsampling_factor)
# Create spatial transformer descriptor # Create spatial transformer descriptor
desc = GpuDnnSpatialTfDesc(grid_dims, dtype)() desc = GpuDnnSpatialTfDesc(dtype)(grid_dims)
# Create grid dimensions variable # Create grid dimensions variable
grid_dims_var = as_tensor_variable(grid_dims) grid_dims_var = as_tensor_variable(grid_dims)
# Setup and return sampling grid
# Setup grid of coordinates
grid_coord = GpuDnnGridGenerator(dtype)(grid_dims_var, theta, desc) grid_coord = GpuDnnGridGenerator(dtype)(grid_dims_var, theta, desc)
grid_sampler = GpuDnnGridSampler(dtype)(inp, grid_coord, desc, alpha, beta)
grid_sampler = GpuDnnGridSampler(dtype)(img, grid_coord, desc, alpha, beta)
return grid_sampler return grid_sampler
......
...@@ -2441,11 +2441,9 @@ def test_dnn_spatialtf(): ...@@ -2441,11 +2441,9 @@ def test_dnn_spatialtf():
# Convert from NHWC to NCHW # Convert from NHWC to NCHW
img = np.transpose(img, axes=(0, 3, 1, 2)).astype(theano.config.floatX) img = np.transpose(img, axes=(0, 3, 1, 2)).astype(theano.config.floatX)
gpu_img = gpuarray_shared_constructor(img) gpu_img = gpuarray_shared_constructor(img)
# Downsample image dimensions by a factor of 2, i.e. our output tensor will
# have shape (n, c, h / 2, w / 2)
downsample_factor = 2 downsample_factor = 2
grid_h = img_dims[1] // downsample_factor
grid_w = img_dims[2] // downsample_factor
grid_dims = (img_dims[0], img_dims[3], grid_h, grid_w)
# Transformation matrix # Transformation matrix
rotation = [[1, 0, 0], rotation = [[1, 0, 0],
...@@ -2454,7 +2452,7 @@ def test_dnn_spatialtf(): ...@@ -2454,7 +2452,7 @@ def test_dnn_spatialtf():
transform = np.asarray(img_dims[0] * [rotation], dtype=theano.config.floatX) transform = np.asarray(img_dims[0] * [rotation], dtype=theano.config.floatX)
gpu_transform = gpuarray_shared_constructor(transform) gpu_transform = gpuarray_shared_constructor(transform)
st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, grid_dims) st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, downsample_factor)
st_dnn_func = theano.function([], [st_dnn]) st_dnn_func = theano.function([], [st_dnn])
# Check if function graph contains the spatial transformer Ops # Check if function graph contains the spatial transformer Ops
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论