提交 388f057b authored 作者: João Victor Risso's avatar João Victor Risso

Remove spatial transformer descriptor from grid and sampler Ops

上级 ed89dc9e
#section support_code_apply
int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * out_dims,
cudnnSpatialTransformerDescriptor_t * desc,
PARAMS_TYPE * params)
{
cudnnStatus_t err;
const int nimages = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 0));
const int nchannels = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 1));
const int height = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 2));
const int width = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 3));
if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformerDesc: invalid grid dimensions" );
return 1;
}
// num_images, num_channels, height, width
const int out_tensor_dims[4] = { nimages, nchannels, height, width };
err = cudnnCreateSpatialTransformerDescriptor( desc );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDesc: could not allocate descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so it is not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor( *desc, CUDNN_SAMPLER_BILINEAR,
params->precision, 4, out_tensor_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDesc: could not initialize descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
return 0;
}
#section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
#section init_code_struct
cudnnStatus_t APPLY_SPECIFIC(err) = CUDNN_STATUS_SUCCESS;
APPLY_SPECIFIC(sptf) = NULL;
if ((APPLY_SPECIFIC(err) = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf))) != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerGrid: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
#section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL) { cudnnDestroySpatialTransformerDescriptor(APPLY_SPECIFIC(sptf)); }
#section support_code_struct
int
APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
PyArrayObject * out_dims,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** grid,
cudnnHandle_t _handle)
{
PyGpuContextObject * gpu_ctx = theta->context;
size_t grid_dims[4];
int num_images, num_channels, height, width;
int desc_dims[4];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if ( theta->ga.typecode != GA_FLOAT &&
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
switch(theta->ga.typecode)
{
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer." );
return 1;
}
else if ( PyGpuArray_DIM( theta, 1 ) != 2 || PyGpuArray_DIM( theta, 2 ) != 3 )
if ( PyGpuArray_DIM( theta, 1 ) != 2 || PyGpuArray_DIM( theta, 2 ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)",
......@@ -38,14 +69,33 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
// Obtain output dimensions
num_images = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 0 ) );
num_channels = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 1 ) );
height = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 2 ) );
width = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 3 ) );
// Set grid dimensions
// Set transformed output dimensions to setup the descriptor
desc_dims[0] = num_images;
desc_dims[1] = num_channels;
desc_dims[2] = height;
desc_dims[3] = width;
// Set sampling grid dimensions
grid_dims[0] = num_images;
grid_dims[1] = height;
grid_dims[2] = width;
grid_dims[3] = 2;
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, desc_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGrid: could not initialize descriptor (sptf): %s",
cudnnGetErrorString( err ) );
return 1;
}
if ( theano_prep_output( grid, 4, grid_dims, theta->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 )
{
......@@ -59,8 +109,8 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfGridGeneratorForward( _handle, desc, PyGpuArray_DEV_DATA( theta ),
PyGpuArray_DEV_DATA( *grid ) );
err = cudnnSpatialTfGridGeneratorForward( _handle, APPLY_SPECIFIC(sptf),
PyGpuArray_DEV_DATA( theta ), PyGpuArray_DEV_DATA( *grid ) );
cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
......
#section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc);
cudnnTensorDescriptor_t APPLY_SPECIFIC(ydesc);
#section init_code_struct
APPLY_SPECIFIC(sptf) = NULL;
APPLY_SPECIFIC(xdesc) = NULL;
APPLY_SPECIFIC(ydesc) = NULL;
{
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
err = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf));
if (err != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerSampler: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString( err ));
FAIL;
}
err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) );
if ( err != CUDNN_STATUS_SUCCESS )
{
......@@ -31,6 +43,9 @@ APPLY_SPECIFIC(ydesc) = NULL;
#section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL)
cudnnDestroySpatialTransformerDescriptor(APPLY_SPECIFIC(sptf));
if ( APPLY_SPECIFIC(xdesc) != NULL )
cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) );
......@@ -42,7 +57,6 @@ if ( APPLY_SPECIFIC(ydesc) != NULL )
int
APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
PyGpuArrayObject * grid,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** output,
cudnnHandle_t _handle)
{
......@@ -52,6 +66,8 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
double alpha = 1.0, beta = 0.0;
float af = alpha, bf = beta;
size_t out_dims[4];
int desc_dims[4];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
switch (input->ga.typecode)
......@@ -59,14 +75,17 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
case GA_DOUBLE:
alpha_p = (void *)α
beta_p = (void *)β
dt = CUDNN_DATA_DOUBLE;
break;
case GA_FLOAT:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
dt = CUDNN_DATA_FLOAT;
break;
case GA_HALF:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString( PyExc_TypeError,
......@@ -78,6 +97,11 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
out_dims[1] = (size_t) PyGpuArray_DIM(input, 1); // num_channels
out_dims[2] = (size_t) PyGpuArray_DIM(grid, 1); // grid height
out_dims[3] = (size_t) PyGpuArray_DIM(grid, 2); // grid width
// Set output dimensions for the descriptor setup
desc_dims[0] = (int) out_dims[0];
desc_dims[1] = (int) out_dims[1];
desc_dims[2] = (int) out_dims[2];
desc_dims[3] = (int) out_dims[3];
if ( out_dims[0] == 0 || out_dims[1] == 0 || out_dims[2] == 0 || out_dims[3] == 0 )
{
......@@ -94,6 +118,18 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
return 1;
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, desc_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerSampler: could not initialize descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 )
return 1;
......@@ -106,9 +142,9 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
cuda_wait( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, APPLY_SPECIFIC(xdesc),
PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ), beta_p,
APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) );
err = cudnnSpatialTfSamplerForward( _handle, APPLY_SPECIFIC(sptf), alpha_p,
APPLY_SPECIFIC(xdesc), PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ),
beta_p, APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) );
cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
......
......@@ -13,6 +13,7 @@ from theano import Op, Apply, tensor, config, Variable
from theano.scalar import (as_scalar, constant, Log, get_scalar_type,
int32 as int_t, bool as bool_t, uint32 as uint32_t)
from theano.tensor import as_tensor_variable, Argmax
from theano.tensor.extra_ops import cpu_contiguous
from theano.gradient import DisconnectedType, grad_not_implemented
from theano.gof import Optimizer, local_optimizer, COp, ParamsType, EnumList
from theano.gof.cmodule import GCC_compiler
......@@ -2738,66 +2739,12 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
return result
class GpuDnnTransformerDesc(COp):
"""
Descriptor Op for cuDNN Spatial Transformer.
"""
__props__ = ('precision',)
params_type = ParamsType(precision=cudnn.cudnnDataType_t)
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self):
header_dirs = [os.path.dirname(__file__)]
if config.dnn.include_path:
header_dirs += [config.dnn.include_path]
return header_dirs
def c_libraries(self):
return ['cudnn']
def c_lib_dirs(self):
lib_dirs = []
if config.dnn.library_path:
lib_dirs += [config.dnn.library_path]
return lib_dirs
def do_constant_folding(self, node):
return False
def __init__(self, precision=theano.config.floatX):
COp.__init__(self, ["c_code/dnn_sptf_desc.c"], "APPLY_SPECIFIC(dnn_sptf_desc)")
assert cudnn.cudnnDataType_t.has_alias(precision)
self.precision = precision
def make_node(self, out_dims):
out_dims = as_tensor_variable(out_dims)
assert out_dims.dtype in theano.tensor.basic.integer_dtypes
assert out_dims.ndim == 1
out_dims = theano.tensor.basic.cast(out_dims, 'int64')
node = Apply(self, [out_dims],
[CDataType("cudnnSpatialTransformerDescriptor_t",
freefunc="cudnnDestroySpatialTransformerDescriptor")()])
# DebugMode cannot compare the values of CDataType variables, so by
# default it returns False all the time. To prevent DebugMode from
# complaining because of the MergeOptimizer, we make this variable
# always compare to True.
out = node.outputs[0]
out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return node
def c_code_cache_version(self):
return (super(GpuDnnTransformerDesc, self).c_code_cache_version(), version())
class GpuDnnTransformerGrid(DnnBase):
"""
Grid generator Op for cuDNN Spatial Transformer.
"""
__props__ = ()
_cop_num_inputs = 3
_cop_num_inputs = 2
_cop_num_outputs = 1
_f16_ok = True
check_input = False
......@@ -2805,7 +2752,7 @@ class GpuDnnTransformerGrid(DnnBase):
def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_grid.c"], "APPLY_SPECIFIC(dnn_sptf_grid)")
def make_node(self, theta, desc):
def make_node(self, theta, out_dims):
"""
Create a grid generator node for a cuDNN Spatial Transformer
......@@ -2815,22 +2762,14 @@ class GpuDnnTransformerGrid(DnnBase):
Affine transformation tensor containing one affine transformation
matrix per image. ``theta`` is usually generated by the localization
network.
desc : GpuDnnTransformerDesc
Spatial transformer descriptor
"""
context_name = infer_context_name(desc)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
context_name = infer_context_name(theta)
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64')
assert theta.ndim == 3
# Setup output dimensions using input from descriptor
out_dims = as_tensor_variable(desc.owner.inputs[0])
out_dims = cpu_contiguous(as_tensor_variable(out_dims))
assert out_dims.dtype in theano.tensor.basic.integer_dtypes
assert out_dims.ndim == 1
# Ensure 64-bit ints are passed to the C code
......@@ -2839,20 +2778,16 @@ class GpuDnnTransformerGrid(DnnBase):
broadcastable=(theta.type.ndim + 1) * (False,),
context_name=context_name)()
inputs = [theta, out_dims, desc]
inputs = [theta, out_dims]
outputs = [grid]
return Apply(self, inputs, outputs)
def grad(self, inputs, grads):
theta, out_dims, desc = inputs
theta, out_dims = inputs
dgrid = grads[0]
dtheta = GpuDnnTransformerGradT()(dgrid, desc)
return [dtheta, grad_not_implemented(self, 1, out_dims), DisconnectedType()()]
def connection_pattern(self, node):
# not connected to desc
return [[1], [1], [0]]
dtheta = GpuDnnTransformerGradT()(dgrid)
return [dtheta, grad_not_implemented(self, 1, out_dims)]
class GpuDnnTransformerSampler(DnnBase):
......@@ -2860,7 +2795,7 @@ class GpuDnnTransformerSampler(DnnBase):
Grid sampler Op for cuDNN Spatial Transformer.
"""
__props__ = ()
_cop_num_inputs = 3
_cop_num_inputs = 2
_cop_num_outputs = 1
_f16_ok = True
check_input = False
......@@ -2868,7 +2803,7 @@ class GpuDnnTransformerSampler(DnnBase):
def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_sampler.c"], "APPLY_SPECIFIC(dnn_sptf_sampler)")
def make_node(self, img, grid, desc):
def make_node(self, img, grid):
"""
Create a grid sampler node for a cuDNN Spatial Transformer
......@@ -2883,15 +2818,8 @@ class GpuDnnTransformerSampler(DnnBase):
grid : GpuDnnTransformerGrid
Grid that contains the coordinates of the pixels to be sampled from
the inputs images.
desc : GpuDnnTransformerDesc
Spatial transformer descriptor
"""
context_name = infer_context_name(desc)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
context_name = infer_context_name(img, grid)
img = gpu_contiguous(as_gpuarray_variable(img, context_name))
if img.type.ndim != 4:
......@@ -2909,20 +2837,16 @@ class GpuDnnTransformerSampler(DnnBase):
broadcastable=img.type.ndim * (False,),
context_name=context_name)()
inputs = [img, grid, desc]
inputs = [img, grid]
outputs = [out]
return Apply(self, inputs, outputs)
def grad(self, inputs, grads):
img, grid, desc = inputs
img, grid = inputs
dy = grads[0]
dimg, dgrid = GpuDnnTransformerGradI()(img, grid, dy, desc)
return [dimg, dgrid, DisconnectedType()()]
def connection_pattern(self, node):
# not connected to desc
return [[1], [1], [0]]
dimg, dgrid = GpuDnnTransformerGradI()(img, grid, dy)
return [dimg, dgrid]
class GpuDnnTransformerGradI(DnnBase):
......@@ -2930,7 +2854,7 @@ class GpuDnnTransformerGradI(DnnBase):
Gradient of inputs Op for cuDNN Spatial Transformer.
"""
__props__ = ()
_cop_num_inputs = 4
_cop_num_inputs = 3
_cop_num_outputs = 2
_f16_ok = True
check_input = False
......@@ -2938,12 +2862,8 @@ class GpuDnnTransformerGradI(DnnBase):
def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)")
def make_node(self, img, grid, dy, desc):
context_name = infer_context_name(img, grid, dy, desc)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
def make_node(self, img, grid, dy):
context_name = infer_context_name(img, grid, dy)
img = as_gpuarray_variable(gpu_contiguous(img), context_name)
if img.ndim != 4:
......@@ -2960,22 +2880,18 @@ class GpuDnnTransformerGradI(DnnBase):
dimg = img.type()
dgrid = grid.type()
inputs = [img, grid, dy, desc]
inputs = [img, grid, dy]
outputs = [dimg, dgrid]
return Apply(self, inputs, outputs)
def connection_pattern(self, node):
# not connected to desc
return [[1, 1], [1, 1], [1, 1], [0, 0]]
class GpuDnnTransformerGradT(DnnBase):
"""
Gradient of affine transformations Op for cuDNN Spatial Transformer.
"""
__props__ = ()
_cop_num_inputs = 2
_cop_num_inputs = 1
_cop_num_outputs = 1
_f16_ok = True
check_input = False
......@@ -2983,12 +2899,8 @@ class GpuDnnTransformerGradT(DnnBase):
def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)")
def make_node(self, dgrid, desc):
context_name = infer_context_name(desc)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
def make_node(self, dgrid):
context_name = infer_context_name(dgrid)
dgrid = as_gpuarray_variable(dgrid, context_name)
assert dgrid.dtype in ('float16', 'float32', 'float64')
......@@ -2997,15 +2909,11 @@ class GpuDnnTransformerGradT(DnnBase):
dtheta = GpuArrayType(dtype=dgrid.dtype,
broadcastable=(dgrid.type.ndim - 1) * (False,),
context_name=context_name)()
inputs = [dgrid, desc]
inputs = [dgrid]
outputs = [dtheta]
return Apply(self, inputs, outputs)
def connection_pattern(self, node):
# not connected to desc
return [[1], [0]]
def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.config.floatX):
"""
......@@ -3050,10 +2958,7 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co
theano.tensor.ceil(img.shape[3] * scale_width))
out_dims = tuple([as_scalar(v).astype('int64') for v in out_dims])
# Create spatial transformer descriptor
desc = GpuDnnTransformerDesc(precision)(out_dims)
context_name = infer_context_name(desc)
context_name = infer_context_name(img, theta)
img = gpu_contiguous(as_gpuarray_variable(img, context_name))
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
......@@ -3063,8 +2968,8 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co
assert theta.ndim == 3
# Setup spatial transformer
grid = GpuDnnTransformerGrid()(theta, desc)
sampler = GpuDnnTransformerSampler()(img, grid, desc)
grid = GpuDnnTransformerGrid()(theta, out_dims)
sampler = GpuDnnTransformerSampler()(img, grid)
return sampler
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论