提交 9f6b5886 authored 作者: João Victor Risso's avatar João Victor Risso

Remove desc parameter and manage descriptor within gradient Ops in spatial transformer

上级 388f057b
#section support_code_struct #section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc); cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc);
cudnnTensorDescriptor_t APPLY_SPECIFIC(dxdesc); cudnnTensorDescriptor_t APPLY_SPECIFIC(dxdesc);
cudnnTensorDescriptor_t APPLY_SPECIFIC(dydesc); cudnnTensorDescriptor_t APPLY_SPECIFIC(dydesc);
#section init_code_struct #section init_code_struct
APPLY_SPECIFIC(sptf) = NULL;
APPLY_SPECIFIC(xdesc) = NULL; APPLY_SPECIFIC(xdesc) = NULL;
APPLY_SPECIFIC(dxdesc) = NULL; APPLY_SPECIFIC(dxdesc) = NULL;
APPLY_SPECIFIC(dydesc) = NULL; APPLY_SPECIFIC(dydesc) = NULL;
{ {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
err = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf));
if (err != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerGradI: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString(err));
FAIL;
}
err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) ); err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) );
if ( err != CUDNN_STATUS_SUCCESS ) if ( err != CUDNN_STATUS_SUCCESS )
{ {
...@@ -42,6 +54,9 @@ APPLY_SPECIFIC(dydesc) = NULL; ...@@ -42,6 +54,9 @@ APPLY_SPECIFIC(dydesc) = NULL;
#section cleanup_code_struct #section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL)
cudnnDestroySpatialTransformerDescriptor( APPLY_SPECIFIC(sptf) );
if ( APPLY_SPECIFIC(xdesc) != NULL ) if ( APPLY_SPECIFIC(xdesc) != NULL )
cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) ); cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) );
...@@ -57,7 +72,6 @@ int ...@@ -57,7 +72,6 @@ int
APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input, APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input,
PyGpuArrayObject * grid, PyGpuArrayObject * grid,
PyGpuArrayObject * dy, PyGpuArrayObject * dy,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** input_grad, PyGpuArrayObject ** input_grad,
PyGpuArrayObject ** grid_grad, PyGpuArrayObject ** grid_grad,
cudnnHandle_t _handle) cudnnHandle_t _handle)
...@@ -67,6 +81,8 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input, ...@@ -67,6 +81,8 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input,
void * beta_p; void * beta_p;
double alpha = 1.0, beta = 0.0; double alpha = 1.0, beta = 0.0;
float af = alpha, bf = beta; float af = alpha, bf = beta;
int out_dims[4];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
switch (input->ga.typecode) switch (input->ga.typecode)
...@@ -74,14 +90,17 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input, ...@@ -74,14 +90,17 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input,
case GA_DOUBLE: case GA_DOUBLE:
alpha_p = (void *)α alpha_p = (void *)α
beta_p = (void *)β beta_p = (void *)β
dt = CUDNN_DATA_DOUBLE;
break; break;
case GA_FLOAT: case GA_FLOAT:
alpha_p = (void *)⁡ alpha_p = (void *)⁡
beta_p = (void *)&bf; beta_p = (void *)&bf;
dt = CUDNN_DATA_FLOAT;
break; break;
case GA_HALF: case GA_HALF:
alpha_p = (void *)⁡ alpha_p = (void *)⁡
beta_p = (void *)&bf; beta_p = (void *)&bf;
dt = CUDNN_DATA_HALF;
break; break;
default: default:
PyErr_SetString( PyExc_TypeError, PyErr_SetString( PyExc_TypeError,
...@@ -108,6 +127,24 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input, ...@@ -108,6 +127,24 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input,
GA_C_ORDER, gpu_ctx ) != 0 ) GA_C_ORDER, gpu_ctx ) != 0 )
return 1; return 1;
// Obtain output dimensions to setup descriptor
out_dims[0] = (int) PyGpuArray_DIM(input, 0); // num_images
out_dims[1] = (int) PyGpuArray_DIM(input, 1); // num_channels
out_dims[2] = (int) PyGpuArray_DIM(grid, 1); // grid height
out_dims[3] = (int) PyGpuArray_DIM(grid, 2); // grid width
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, out_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGradI: could not initialize descriptor (sptf): %s",
cudnnGetErrorString( err ) );
return 1;
}
if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 ) if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 )
return 1; return 1;
...@@ -130,11 +167,11 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input, ...@@ -130,11 +167,11 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input,
cuda_wait( (*input_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_wait( (*input_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
cuda_wait( (*grid_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_wait( (*grid_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfSamplerBackward( _handle, desc, alpha_p, APPLY_SPECIFIC(xdesc), err = cudnnSpatialTfSamplerBackward( _handle, APPLY_SPECIFIC(sptf), alpha_p,
PyGpuArray_DEV_DATA( input ), beta_p, APPLY_SPECIFIC(dxdesc), APPLY_SPECIFIC(xdesc), PyGpuArray_DEV_DATA( input ), beta_p,
PyGpuArray_DEV_DATA( *input_grad ), alpha_p, APPLY_SPECIFIC(dydesc), APPLY_SPECIFIC(dxdesc), PyGpuArray_DEV_DATA( *input_grad ), alpha_p,
PyGpuArray_DEV_DATA( dy ), PyGpuArray_DEV_DATA( grid ), beta_p, APPLY_SPECIFIC(dydesc), PyGpuArray_DEV_DATA( dy ), PyGpuArray_DEV_DATA( grid ),
PyGpuArray_DEV_DATA( *grid_grad ) ); beta_p, PyGpuArray_DEV_DATA( *grid_grad ) );
cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
......
#section support_code_struct #section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
#section init_code_struct
cudnnStatus_t APPLY_SPECIFIC(err) = CUDNN_STATUS_SUCCESS;
APPLY_SPECIFIC(sptf) = NULL;
if ((APPLY_SPECIFIC(err) = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf))) != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerGradT: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
#section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL)
cudnnDestroySpatialTransformerDescriptor(APPLY_SPECIFIC(sptf));
#section support_code_struct
int int
APPLY_SPECIFIC(dnn_sptf_gt)(PyGpuArrayObject * dgrid, APPLY_SPECIFIC(dnn_sptf_gt)(PyGpuArrayObject * dgrid,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** dtheta, PyGpuArrayObject ** dtheta,
cudnnHandle_t _handle) cudnnHandle_t _handle)
{ {
PyGpuContextObject * gpu_ctx = dgrid->context; PyGpuContextObject * gpu_ctx = dgrid->context;
int num_images, height, width;
int desc_dims[4];
size_t dtheta_dims[3];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
int num_images = (int) PyGpuArray_DIM( dgrid, 0 ); switch(dgrid->ga.typecode)
{
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformerGradT: unsupported data type for dgrid in spatial transformer." );
return 1;
}
const size_t dtheta_dims[3] = { num_images, 2, 3 }; num_images = (int) PyGpuArray_DIM( dgrid, 0 );
height = (int) PyGpuArray_DIM( dgrid, 1 );
width = (int) PyGpuArray_DIM( dgrid, 2 );
dtheta_dims[0] = num_images;
dtheta_dims[1] = 2;
dtheta_dims[2] = 3;
if ( theano_prep_output( dtheta, 3, dtheta_dims, dgrid->ga.typecode, if ( theano_prep_output( dtheta, 3, dtheta_dims, dgrid->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 ) GA_C_ORDER, gpu_ctx ) != 0 )
return 1; return 1;
desc_dims[0] = num_images;
// Assume number of channels is 1, because the information is not
// available or relevant here
desc_dims[1] = 1;
desc_dims[2] = height;
desc_dims[3] = width;
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, desc_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGrid: could not initialize descriptor (sptf): %s",
cudnnGetErrorString( err ) );
return 1;
}
cuda_enter( gpu_ctx->ctx ); cuda_enter( gpu_ctx->ctx );
cuda_wait( dgrid->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_wait( dgrid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*dtheta)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_wait( (*dtheta)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfGridGeneratorBackward( _handle, desc, err = cudnnSpatialTfGridGeneratorBackward( _handle, APPLY_SPECIFIC(sptf),
PyGpuArray_DEV_DATA( dgrid ), PyGpuArray_DEV_DATA( *dtheta ) ); PyGpuArray_DEV_DATA( dgrid ), PyGpuArray_DEV_DATA( *dtheta ) );
cuda_record( dgrid->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( dgrid->ga.data, GPUARRAY_CUDA_WAIT_READ );
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论