提交 388f057b authored 作者: João Victor Risso's avatar João Victor Risso

Remove spatial transformer descriptor from grid and sampler Ops

上级 ed89dc9e
#section support_code_apply
int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * out_dims,
cudnnSpatialTransformerDescriptor_t * desc,
PARAMS_TYPE * params)
{
cudnnStatus_t err;
const int nimages = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 0));
const int nchannels = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 1));
const int height = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 2));
const int width = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 3));
if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformerDesc: invalid grid dimensions" );
return 1;
}
// num_images, num_channels, height, width
const int out_tensor_dims[4] = { nimages, nchannels, height, width };
err = cudnnCreateSpatialTransformerDescriptor( desc );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDesc: could not allocate descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so it is not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor( *desc, CUDNN_SAMPLER_BILINEAR,
params->precision, 4, out_tensor_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDesc: could not initialize descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
return 0;
}
#section support_code_struct #section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
#section init_code_struct
cudnnStatus_t APPLY_SPECIFIC(err) = CUDNN_STATUS_SUCCESS;
APPLY_SPECIFIC(sptf) = NULL;
if ((APPLY_SPECIFIC(err) = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf))) != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerGrid: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
#section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL) { cudnnDestroySpatialTransformerDescriptor(APPLY_SPECIFIC(sptf)); }
#section support_code_struct
int int
APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta, APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
PyArrayObject * out_dims, PyArrayObject * out_dims,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** grid, PyGpuArrayObject ** grid,
cudnnHandle_t _handle) cudnnHandle_t _handle)
{ {
PyGpuContextObject * gpu_ctx = theta->context; PyGpuContextObject * gpu_ctx = theta->context;
size_t grid_dims[4]; size_t grid_dims[4];
int num_images, num_channels, height, width; int num_images, num_channels, height, width;
int desc_dims[4];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if ( theta->ga.typecode != GA_FLOAT && switch(theta->ga.typecode)
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
{ {
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString( PyExc_TypeError, PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer." ); "GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer." );
return 1; return 1;
} }
else if ( PyGpuArray_DIM( theta, 1 ) != 2 || PyGpuArray_DIM( theta, 2 ) != 3 )
if ( PyGpuArray_DIM( theta, 1 ) != 2 || PyGpuArray_DIM( theta, 2 ) != 3 )
{ {
PyErr_Format( PyExc_RuntimeError, PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)", "GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)",
...@@ -38,14 +69,33 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta, ...@@ -38,14 +69,33 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
// Obtain output dimensions // Obtain output dimensions
num_images = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 0 ) ); num_images = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 0 ) );
num_channels = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 1 ) );
height = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 2 ) ); height = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 2 ) );
width = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 3 ) ); width = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 3 ) );
// Set grid dimensions
// Set transformed output dimensions to setup the descriptor
desc_dims[0] = num_images;
desc_dims[1] = num_channels;
desc_dims[2] = height;
desc_dims[3] = width;
// Set sampling grid dimensions
grid_dims[0] = num_images; grid_dims[0] = num_images;
grid_dims[1] = height; grid_dims[1] = height;
grid_dims[2] = width; grid_dims[2] = width;
grid_dims[3] = 2; grid_dims[3] = 2;
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, desc_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGrid: could not initialize descriptor (sptf): %s",
cudnnGetErrorString( err ) );
return 1;
}
if ( theano_prep_output( grid, 4, grid_dims, theta->ga.typecode, if ( theano_prep_output( grid, 4, grid_dims, theta->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 ) GA_C_ORDER, gpu_ctx ) != 0 )
{ {
...@@ -59,8 +109,8 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta, ...@@ -59,8 +109,8 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfGridGeneratorForward( _handle, desc, PyGpuArray_DEV_DATA( theta ), err = cudnnSpatialTfGridGeneratorForward( _handle, APPLY_SPECIFIC(sptf),
PyGpuArray_DEV_DATA( *grid ) ); PyGpuArray_DEV_DATA( theta ), PyGpuArray_DEV_DATA( *grid ) );
cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
......
#section support_code_struct #section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc); cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc);
cudnnTensorDescriptor_t APPLY_SPECIFIC(ydesc); cudnnTensorDescriptor_t APPLY_SPECIFIC(ydesc);
#section init_code_struct #section init_code_struct
APPLY_SPECIFIC(sptf) = NULL;
APPLY_SPECIFIC(xdesc) = NULL; APPLY_SPECIFIC(xdesc) = NULL;
APPLY_SPECIFIC(ydesc) = NULL; APPLY_SPECIFIC(ydesc) = NULL;
{ {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
err = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf));
if (err != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerSampler: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString( err ));
FAIL;
}
err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) ); err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) );
if ( err != CUDNN_STATUS_SUCCESS ) if ( err != CUDNN_STATUS_SUCCESS )
{ {
...@@ -31,6 +43,9 @@ APPLY_SPECIFIC(ydesc) = NULL; ...@@ -31,6 +43,9 @@ APPLY_SPECIFIC(ydesc) = NULL;
#section cleanup_code_struct #section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL)
cudnnDestroySpatialTransformerDescriptor(APPLY_SPECIFIC(sptf));
if ( APPLY_SPECIFIC(xdesc) != NULL ) if ( APPLY_SPECIFIC(xdesc) != NULL )
cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) ); cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) );
...@@ -42,7 +57,6 @@ if ( APPLY_SPECIFIC(ydesc) != NULL ) ...@@ -42,7 +57,6 @@ if ( APPLY_SPECIFIC(ydesc) != NULL )
int int
APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
PyGpuArrayObject * grid, PyGpuArrayObject * grid,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** output, PyGpuArrayObject ** output,
cudnnHandle_t _handle) cudnnHandle_t _handle)
{ {
...@@ -52,6 +66,8 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -52,6 +66,8 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
double alpha = 1.0, beta = 0.0; double alpha = 1.0, beta = 0.0;
float af = alpha, bf = beta; float af = alpha, bf = beta;
size_t out_dims[4]; size_t out_dims[4];
int desc_dims[4];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
switch (input->ga.typecode) switch (input->ga.typecode)
...@@ -59,14 +75,17 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -59,14 +75,17 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
case GA_DOUBLE: case GA_DOUBLE:
alpha_p = (void *)α alpha_p = (void *)α
beta_p = (void *)β beta_p = (void *)β
dt = CUDNN_DATA_DOUBLE;
break; break;
case GA_FLOAT: case GA_FLOAT:
alpha_p = (void *)⁡ alpha_p = (void *)⁡
beta_p = (void *)&bf; beta_p = (void *)&bf;
dt = CUDNN_DATA_FLOAT;
break; break;
case GA_HALF: case GA_HALF:
alpha_p = (void *)⁡ alpha_p = (void *)⁡
beta_p = (void *)&bf; beta_p = (void *)&bf;
dt = CUDNN_DATA_HALF;
break; break;
default: default:
PyErr_SetString( PyExc_TypeError, PyErr_SetString( PyExc_TypeError,
...@@ -78,6 +97,11 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -78,6 +97,11 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
out_dims[1] = (size_t) PyGpuArray_DIM(input, 1); // num_channels out_dims[1] = (size_t) PyGpuArray_DIM(input, 1); // num_channels
out_dims[2] = (size_t) PyGpuArray_DIM(grid, 1); // grid height out_dims[2] = (size_t) PyGpuArray_DIM(grid, 1); // grid height
out_dims[3] = (size_t) PyGpuArray_DIM(grid, 2); // grid width out_dims[3] = (size_t) PyGpuArray_DIM(grid, 2); // grid width
// Set output dimensions for the descriptor setup
desc_dims[0] = (int) out_dims[0];
desc_dims[1] = (int) out_dims[1];
desc_dims[2] = (int) out_dims[2];
desc_dims[3] = (int) out_dims[3];
if ( out_dims[0] == 0 || out_dims[1] == 0 || out_dims[2] == 0 || out_dims[3] == 0 ) if ( out_dims[0] == 0 || out_dims[1] == 0 || out_dims[2] == 0 || out_dims[3] == 0 )
{ {
...@@ -94,6 +118,18 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -94,6 +118,18 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
return 1; return 1;
} }
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, desc_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerSampler: could not initialize descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 ) if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 )
return 1; return 1;
...@@ -106,9 +142,9 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -106,9 +142,9 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
cuda_wait( grid->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_wait( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_wait( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, APPLY_SPECIFIC(xdesc), err = cudnnSpatialTfSamplerForward( _handle, APPLY_SPECIFIC(sptf), alpha_p,
PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ), beta_p, APPLY_SPECIFIC(xdesc), PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ),
APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) ); beta_p, APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) );
cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
......
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论