提交 388f057b authored 作者: João Victor Risso's avatar João Victor Risso

Remove spatial transformer descriptor from grid and sampler Ops

上级 ed89dc9e
#section support_code_apply
int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * out_dims,
cudnnSpatialTransformerDescriptor_t * desc,
PARAMS_TYPE * params)
{
cudnnStatus_t err;
const int nimages = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 0));
const int nchannels = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 1));
const int height = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 2));
const int width = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 3));
if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformerDesc: invalid grid dimensions" );
return 1;
}
// num_images, num_channels, height, width
const int out_tensor_dims[4] = { nimages, nchannels, height, width };
err = cudnnCreateSpatialTransformerDescriptor( desc );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDesc: could not allocate descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so it is not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor( *desc, CUDNN_SAMPLER_BILINEAR,
params->precision, 4, out_tensor_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDesc: could not initialize descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
return 0;
}
#section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
#section init_code_struct
cudnnStatus_t APPLY_SPECIFIC(err) = CUDNN_STATUS_SUCCESS;
APPLY_SPECIFIC(sptf) = NULL;
if ((APPLY_SPECIFIC(err) = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf))) != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerGrid: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
#section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL) { cudnnDestroySpatialTransformerDescriptor(APPLY_SPECIFIC(sptf)); }
#section support_code_struct
int
APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
PyArrayObject * out_dims,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** grid,
cudnnHandle_t _handle)
{
PyGpuContextObject * gpu_ctx = theta->context;
size_t grid_dims[4];
int num_images, num_channels, height, width;
int desc_dims[4];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if ( theta->ga.typecode != GA_FLOAT &&
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
switch(theta->ga.typecode)
{
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer." );
return 1;
}
else if ( PyGpuArray_DIM( theta, 1 ) != 2 || PyGpuArray_DIM( theta, 2 ) != 3 )
if ( PyGpuArray_DIM( theta, 1 ) != 2 || PyGpuArray_DIM( theta, 2 ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)",
......@@ -38,14 +69,33 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
// Obtain output dimensions
num_images = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 0 ) );
num_channels = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 1 ) );
height = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 2 ) );
width = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 3 ) );
// Set grid dimensions
// Set transformed output dimensions to setup the descriptor
desc_dims[0] = num_images;
desc_dims[1] = num_channels;
desc_dims[2] = height;
desc_dims[3] = width;
// Set sampling grid dimensions
grid_dims[0] = num_images;
grid_dims[1] = height;
grid_dims[2] = width;
grid_dims[3] = 2;
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, desc_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGrid: could not initialize descriptor (sptf): %s",
cudnnGetErrorString( err ) );
return 1;
}
if ( theano_prep_output( grid, 4, grid_dims, theta->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 )
{
......@@ -59,8 +109,8 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfGridGeneratorForward( _handle, desc, PyGpuArray_DEV_DATA( theta ),
PyGpuArray_DEV_DATA( *grid ) );
err = cudnnSpatialTfGridGeneratorForward( _handle, APPLY_SPECIFIC(sptf),
PyGpuArray_DEV_DATA( theta ), PyGpuArray_DEV_DATA( *grid ) );
cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
......
#section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc);
cudnnTensorDescriptor_t APPLY_SPECIFIC(ydesc);
#section init_code_struct
APPLY_SPECIFIC(sptf) = NULL;
APPLY_SPECIFIC(xdesc) = NULL;
APPLY_SPECIFIC(ydesc) = NULL;
{
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
err = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf));
if (err != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerSampler: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString( err ));
FAIL;
}
err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) );
if ( err != CUDNN_STATUS_SUCCESS )
{
......@@ -31,6 +43,9 @@ APPLY_SPECIFIC(ydesc) = NULL;
#section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL)
cudnnDestroySpatialTransformerDescriptor(APPLY_SPECIFIC(sptf));
if ( APPLY_SPECIFIC(xdesc) != NULL )
cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) );
......@@ -42,7 +57,6 @@ if ( APPLY_SPECIFIC(ydesc) != NULL )
int
APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
PyGpuArrayObject * grid,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** output,
cudnnHandle_t _handle)
{
......@@ -52,6 +66,8 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
double alpha = 1.0, beta = 0.0;
float af = alpha, bf = beta;
size_t out_dims[4];
int desc_dims[4];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
switch (input->ga.typecode)
......@@ -59,14 +75,17 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
case GA_DOUBLE:
alpha_p = (void *)α
beta_p = (void *)β
dt = CUDNN_DATA_DOUBLE;
break;
case GA_FLOAT:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
dt = CUDNN_DATA_FLOAT;
break;
case GA_HALF:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString( PyExc_TypeError,
......@@ -78,6 +97,11 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
out_dims[1] = (size_t) PyGpuArray_DIM(input, 1); // num_channels
out_dims[2] = (size_t) PyGpuArray_DIM(grid, 1); // grid height
out_dims[3] = (size_t) PyGpuArray_DIM(grid, 2); // grid width
// Set output dimensions for the descriptor setup
desc_dims[0] = (int) out_dims[0];
desc_dims[1] = (int) out_dims[1];
desc_dims[2] = (int) out_dims[2];
desc_dims[3] = (int) out_dims[3];
if ( out_dims[0] == 0 || out_dims[1] == 0 || out_dims[2] == 0 || out_dims[3] == 0 )
{
......@@ -94,6 +118,18 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
return 1;
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, desc_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerSampler: could not initialize descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 )
return 1;
......@@ -106,9 +142,9 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
cuda_wait( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, APPLY_SPECIFIC(xdesc),
PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ), beta_p,
APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) );
err = cudnnSpatialTfSamplerForward( _handle, APPLY_SPECIFIC(sptf), alpha_p,
APPLY_SPECIFIC(xdesc), PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ),
beta_p, APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) );
cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
......
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论