Add additional checks of array dimensions and remove dependency on grid dimensions in sampler

上级 7cce8524
...@@ -25,7 +25,6 @@ void spatialtf_context_destroy( spatialtf_context_t * ctx ) ...@@ -25,7 +25,6 @@ void spatialtf_context_destroy( spatialtf_context_t * ctx )
int int
spatialtf_sampler(PyGpuArrayObject * input, spatialtf_sampler(PyGpuArrayObject * input,
PyGpuArrayObject * grid, PyGpuArrayObject * grid,
PyArrayObject * grid_dimensions,
cudnnSpatialTransformerDescriptor_t desc, cudnnSpatialTransformerDescriptor_t desc,
double alpha, double beta, double alpha, double beta,
PyGpuArrayObject ** output, PyGpuArrayObject ** output,
...@@ -37,11 +36,10 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -37,11 +36,10 @@ spatialtf_sampler(PyGpuArrayObject * input,
float af = alpha, bf = beta; float af = alpha, bf = beta;
spatialtf_context_t spatialtf_ctx; spatialtf_context_t spatialtf_ctx;
cudnnDataType_t dt; cudnnDataType_t dt;
// Number of color channels (feature maps) is the innermost dimension
cudnnTensorFormat_t tf = CUDNN_TENSOR_NCHW; cudnnTensorFormat_t tf = CUDNN_TENSOR_NCHW;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if ( PyArray_DIM( grid_dimensions, 0 ) != 4 ) if ( PyGpuArray_NDIM( grid ) != 4 )
{ {
PyErr_SetString( PyExc_RuntimeError, PyErr_SetString( PyExc_RuntimeError,
"grid_dimensions must have 4 dimensions" ); "grid_dimensions must have 4 dimensions" );
...@@ -49,10 +47,9 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -49,10 +47,9 @@ spatialtf_sampler(PyGpuArrayObject * input,
} }
// Obtain grid dimensions // Obtain grid dimensions
const int num_images = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 0 ) ); const int num_images = (int) PyGpuArray_DIM( grid, 0 );
const int num_channels = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 1 ) ); const int height = (int) PyGpuArray_DIM( grid, 1 );
const int height = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 2 ) ); const int width = (int) PyGpuArray_DIM( grid, 2 );
const int width = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 3 ) );
switch (input->ga.typecode) switch (input->ga.typecode)
{ {
...@@ -102,17 +99,16 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -102,17 +99,16 @@ spatialtf_sampler(PyGpuArrayObject * input,
const int input_height = (int) PyGpuArray_DIM( input, 2 ); const int input_height = (int) PyGpuArray_DIM( input, 2 );
const int input_width = (int) PyGpuArray_DIM( input, 3 ); const int input_width = (int) PyGpuArray_DIM( input, 3 );
if ( input_num_images != num_images || if ( input_num_images != num_images )
input_num_channels != num_channels )
{ {
PyErr_Format( PyExc_RuntimeError, PyErr_Format( PyExc_RuntimeError,
"Input should have %d images and %d channels, got %d images and %d channels.", "Input should have %d images, got %d images.",
num_images, num_channels, input_num_images, input_num_channels ); num_images, input_num_images );
return -1; return -1;
} }
err = cudnnSetTensor4dDescriptor( spatialtf_ctx.xdesc, tf, dt, num_images, err = cudnnSetTensor4dDescriptor( spatialtf_ctx.xdesc, tf, dt, num_images,
num_channels, input_height, input_width ); input_num_channels, input_height, input_width );
if ( err != CUDNN_STATUS_SUCCESS ) if ( err != CUDNN_STATUS_SUCCESS )
{ {
...@@ -139,7 +135,7 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -139,7 +135,7 @@ spatialtf_sampler(PyGpuArrayObject * input,
} }
err = cudnnSetTensor4dDescriptor( spatialtf_ctx.ydesc, tf, dt, num_images, err = cudnnSetTensor4dDescriptor( spatialtf_ctx.ydesc, tf, dt, num_images,
num_channels, height, width ); input_num_channels, height, width );
if ( err != CUDNN_STATUS_SUCCESS ) if ( err != CUDNN_STATUS_SUCCESS )
{ {
...@@ -152,14 +148,14 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -152,14 +148,14 @@ spatialtf_sampler(PyGpuArrayObject * input,
return -1; return -1;
} }
const size_t out_dims[4] = { num_images, num_channels, height, width }; const size_t out_dims[4] = { num_images, input_num_channels, height, width };
if ( NULL == *output || if ( NULL == *output ||
! theano_size_check( *output, 4, &(out_dims[0]), (*output)->ga.typecode ) ) ! theano_size_check( *output, 4, out_dims, (*output)->ga.typecode ) )
{ {
Py_XDECREF( *output ); Py_XDECREF( *output );
*output = pygpu_zeros( 4, &(out_dims[0]), input->ga.typecode, GA_C_ORDER, *output = pygpu_empty( 4, out_dims, input->ga.typecode, GA_C_ORDER,
gpu_ctx, Py_None ); gpu_ctx, Py_None );
if ( NULL == *output ) if ( NULL == *output )
...@@ -172,10 +168,6 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -172,10 +168,6 @@ spatialtf_sampler(PyGpuArrayObject * input,
return -1; return -1;
} }
} }
else
{
GpuArray_memset( &( (*output)->ga ), 0 );
}
if ( ! GpuArray_IS_C_CONTIGUOUS( &(input->ga) ) ) if ( ! GpuArray_IS_C_CONTIGUOUS( &(input->ga) ) )
{ {
......
...@@ -2842,7 +2842,7 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2842,7 +2842,7 @@ class GpuDnnSpatialTfDesc(COp):
__props__ = ('dimensions', 'dtype') __props__ = ('dimensions', 'dtype')
params_type = ParamsType(nimages=int_t, nchannels=int_t, height=int_t, width=int_t, params_type = ParamsType(nimages=int_t, nchannels=int_t, height=int_t, width=int_t,
nb_dims=int_t, dtype=cudnn.cudnnDataType_t) dtype=cudnn.cudnnDataType_t)
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -2862,14 +2862,10 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2862,14 +2862,10 @@ class GpuDnnSpatialTfDesc(COp):
def __init__(self, dimensions, dtype="float32"): def __init__(self, dimensions, dtype="float32"):
COp.__init__(self, ["c_code/spatialtf_desc.c"], "APPLY_SPECIFIC(spatialtf_desc)") COp.__init__(self, ["c_code/spatialtf_desc.c"], "APPLY_SPECIFIC(spatialtf_desc)")
# dimensions must have at least width and height
assert len(dimensions) >= 2
self.dimensions = tuple(dimensions)
# cuDNN supports only 2D transformations, therefore output tensor must # cuDNN supports only 2D transformations, therefore output tensor must
# not exceed 4 dimensions (width, height, num_feature_maps, num_images) # have exactly 4 dimensions: (width, height, num_channels, num_images)
assert len(self.dimensions) <= 4 assert len(dimensions) == 4
self.dimensions = tuple(dimensions)
assert cudnn.cudnnDataType_t.has_alias(dtype) assert cudnn.cudnnDataType_t.has_alias(dtype)
self.dtype = dtype self.dtype = dtype
...@@ -2894,8 +2890,6 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2894,8 +2890,6 @@ class GpuDnnSpatialTfDesc(COp):
height = property(lambda self: self.dimensions[2]) height = property(lambda self: self.dimensions[2])
# Grid width # Grid width
width = property(lambda self: self.dimensions[3]) width = property(lambda self: self.dimensions[3])
# Number of dimensions in the output tensor
nb_dims = property(lambda self: len(self.dimensions))
def c_code_cache_version(self): def c_code_cache_version(self):
return (super(GpuDnnSpatialTfDesc, self).c_code_cache_version(), version()) return (super(GpuDnnSpatialTfDesc, self).c_code_cache_version(), version())
...@@ -2914,20 +2908,16 @@ class GpuDnnGridGenerator(DnnBase): ...@@ -2914,20 +2908,16 @@ class GpuDnnGridGenerator(DnnBase):
def __init__(self, dtype): def __init__(self, dtype):
DnnBase.__init__(self, ["c_code/spatialtf_grid.c"], "spatialtf_grid") DnnBase.__init__(self, ["c_code/spatialtf_grid.c"], "spatialtf_grid")
self.dtype = dtype self.dtype = dtype
def dnn_context(self, node):
return node.outputs[0].type.context_name
def make_node(self, grid_dimensions, theta, desc): def make_node(self, grid_dimensions, theta, desc):
context_name = infer_context_name(desc, theta) context_name = infer_context_name(desc, theta)
grid_dimensions = as_tensor_variable(grid_dimensions) grid_dimensions = as_tensor_variable(grid_dimensions)
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name)) theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64')
assert cudnn.cudnnDataType_t.has_alias(theta.dtype) assert cudnn.cudnnDataType_t.has_alias(theta.dtype)
assert theta.ndim == 3
# Allocate GPU memory for grid of coordinates # Allocate GPU memory for grid of coordinates
grid = GpuArrayType(dtype=self.dtype, grid = GpuArrayType(dtype=self.dtype,
...@@ -2948,24 +2938,18 @@ class GpuDnnGridSampler(DnnBase): ...@@ -2948,24 +2938,18 @@ class GpuDnnGridSampler(DnnBase):
""" """
__props__ = ('dtype',) __props__ = ('dtype',)
_cop_num_inputs = 6 _cop_num_inputs = 5
_cop_num_outputs = 1 _cop_num_outputs = 1
def __init__(self, dtype): def __init__(self, dtype):
DnnBase.__init__(self, ["c_code/spatialtf_sampler.c"], "spatialtf_sampler") DnnBase.__init__(self, ["c_code/spatialtf_sampler.c"], "spatialtf_sampler")
self.dtype = dtype self.dtype = dtype
def dnn_context(self, node): def make_node(self, img, grid, desc, alpha=None, beta=None):
return node.outputs[0].type.context_name
def make_node(self, img, grid, grid_dimensions, desc,
alpha=None, beta=None):
context_name = infer_context_name(img, grid) context_name = infer_context_name(img, grid)
img = as_gpuarray_variable(img, context_name) img = as_gpuarray_variable(img, context_name)
grid = as_gpuarray_variable(grid, context_name) grid = as_gpuarray_variable(grid, context_name)
grid_dimensions = as_tensor_variable(grid_dimensions)
output = GpuArrayType(dtype=self.dtype, output = GpuArrayType(dtype=self.dtype,
broadcastable=img.type.ndim * (False,), broadcastable=img.type.ndim * (False,),
...@@ -2973,11 +2957,6 @@ class GpuDnnGridSampler(DnnBase): ...@@ -2973,11 +2957,6 @@ class GpuDnnGridSampler(DnnBase):
if img.type.ndim != 4: if img.type.ndim != 4:
raise TypeError('img must be a 4D tensor') raise TypeError('img must be a 4D tensor')
if output.type.ndim != 4:
raise TypeError('output must be a 4D tensor')
if img.type.ndim != output.type.ndim:
raise TypeError('The number of dimensions of img and output must match')
if (not isinstance(desc.type, CDataType) or if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'): desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
...@@ -2986,8 +2965,7 @@ class GpuDnnGridSampler(DnnBase): ...@@ -2986,8 +2965,7 @@ class GpuDnnGridSampler(DnnBase):
alpha = ensure_dt(alpha, _one, 'alpha', img.dtype) alpha = ensure_dt(alpha, _one, 'alpha', img.dtype)
beta = ensure_dt(beta, _zero, 'beta', img.dtype) beta = ensure_dt(beta, _zero, 'beta', img.dtype)
return Apply(self, [img, grid, grid_dimensions, desc, alpha, beta], return Apply(self, [img, grid, desc, alpha, beta], [output])
[output])
def L_op(self, inputs, outputs, output_grads): def L_op(self, inputs, outputs, output_grads):
pass pass
...@@ -2998,10 +2976,18 @@ def dnn_spatialtf(img, theta, grid_dims, alpha=None, beta=None, dtype=None): ...@@ -2998,10 +2976,18 @@ def dnn_spatialtf(img, theta, grid_dims, alpha=None, beta=None, dtype=None):
GPU spatial transformer using cuDNN from NVIDIA. GPU spatial transformer using cuDNN from NVIDIA.
""" """
# img is a 4D tensor with shape: (num_images, num_channels, width, height)
assert img.ndim == 4
# Grid dimensions must be a 4-dimensional tuple
assert isinstance(grid_dims, tuple)
assert len(grid_dims) == 4
# Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
assert theta.ndim == 3
img = gpu_contiguous(img) img = gpu_contiguous(img)
theta = gpu_contiguous(theta) theta = gpu_contiguous(theta)
dtype = get_precision(dtype, [img, theta]) dtype = img.dtype if dtype is None else dtype
# Create spatial transformer descriptor # Create spatial transformer descriptor
desc = GpuDnnSpatialTfDesc(grid_dims, dtype)() desc = GpuDnnSpatialTfDesc(grid_dims, dtype)()
...@@ -3012,8 +2998,7 @@ def dnn_spatialtf(img, theta, grid_dims, alpha=None, beta=None, dtype=None): ...@@ -3012,8 +2998,7 @@ def dnn_spatialtf(img, theta, grid_dims, alpha=None, beta=None, dtype=None):
# Setup grid of coordinates # Setup grid of coordinates
grid_coord = GpuDnnGridGenerator(dtype)(grid_dims_var, theta, desc) grid_coord = GpuDnnGridGenerator(dtype)(grid_dims_var, theta, desc)
grid_sampler = GpuDnnGridSampler(dtype)(img, grid_coord, grid_dims_var, desc, grid_sampler = GpuDnnGridSampler(dtype)(img, grid_coord, desc, alpha, beta)
alpha, beta)
return grid_sampler return grid_sampler
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论