Add tensor descriptor initialization and fix memory alloc for output in spatialtf_sampler

上级 7bc63958
...@@ -23,12 +23,13 @@ void spatialtf_context_destroy( spatialtf_context_t * ctx ) ...@@ -23,12 +23,13 @@ void spatialtf_context_destroy( spatialtf_context_t * ctx )
#section support_code_struct #section support_code_struct
int int
spatialtf_sampler(PyGpuArrayObject *input, spatialtf_sampler(PyGpuArrayObject * input,
PyGpuArrayObject *om, PyGpuArrayObject * om,
PyGpuArrayObject *grid, PyGpuArrayObject * grid,
PyArrayObject * grid_dimensions,
cudnnSpatialTransformerDescriptor_t desc, cudnnSpatialTransformerDescriptor_t desc,
double alpha, double beta, double alpha, double beta,
PyGpuArrayObject **output, PyGpuArrayObject ** output,
cudnnHandle_t _handle) cudnnHandle_t _handle)
{ {
PyGpuContextObject * gpu_ctx = input->context; PyGpuContextObject * gpu_ctx = input->context;
...@@ -36,18 +37,32 @@ spatialtf_sampler(PyGpuArrayObject *input, ...@@ -36,18 +37,32 @@ spatialtf_sampler(PyGpuArrayObject *input,
void * beta_p; void * beta_p;
float af = alpha, bf = beta; float af = alpha, bf = beta;
spatialtf_context_t spatialtf_ctx; spatialtf_context_t spatialtf_ctx;
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
switch (input->ga.typecode) // Obtain grid dimensions
npy_int * dimensions_data = (npy_int *)PyArray_DATA( grid_dimensions );
const int width = dimensions_data[0];
const int height = dimensions_data[1];
const int num_channels = dimensions_data[2];
const int num_images = dimensions_data[3];
switch (grid->ga.typecode)
{ {
case GA_DOUBLE: case GA_DOUBLE:
alpha_p = (void *)α alpha_p = (void *)α
beta_p = (void *)β beta_p = (void *)β
dt = CUDNN_DATA_DOUBLE;
break; break;
case GA_FLOAT: case GA_FLOAT:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
dt = CUDNN_DATA_FLOAT;
break;
case GA_HALF: case GA_HALF:
alpha_p = (void *)⁡ alpha_p = (void *)⁡
beta_p = (void *)&bf; beta_p = (void *)&bf;
dt = CUDNN_DATA_HALF;
break; break;
default: default:
PyErr_SetString(PyExc_TypeError, PyErr_SetString(PyExc_TypeError,
...@@ -55,14 +70,6 @@ spatialtf_sampler(PyGpuArrayObject *input, ...@@ -55,14 +70,6 @@ spatialtf_sampler(PyGpuArrayObject *input,
return -1; return -1;
} }
if ( grid->ga.typecode != GA_FLOAT &&
grid->ga.typecode != GA_DOUBLE &&
grid->ga.typecode != GA_HALF )
{
PyErr_SetString( PyExc_TypeError, "Unsupported data type for grid" );
return -1;
}
spatialtf_context_init( &spatialtf_ctx ); spatialtf_context_init( &spatialtf_ctx );
cuda_enter( gpu_ctx->ctx ); cuda_enter( gpu_ctx->ctx );
...@@ -80,12 +87,17 @@ spatialtf_sampler(PyGpuArrayObject *input, ...@@ -80,12 +87,17 @@ spatialtf_sampler(PyGpuArrayObject *input,
return -1; return -1;
} }
if ( theano_prep_output( output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om), grid->ga.typecode, err = cudnnSetTensor4dDescriptor( spatialtf_ctx.xdesc, CUDNN_TENSOR_NCHW, dt,
GA_C_ORDER, gpu_ctx ) != 0 ) num_images, num_channels, height, width );
if ( err != CUDNN_STATUS_SUCCESS )
{ {
spatialtf_context_destroy( &spatialtf_ctx ); spatialtf_context_destroy( &spatialtf_ctx );
cuda_exit( gpu_ctx->ctx ); cuda_exit( gpu_ctx->ctx );
PyErr_Format( PyExc_RuntimeError,
"Could not initialize xdesc: %s",
cudnnGetErrorString(err) );
return -1; return -1;
} }
...@@ -102,9 +114,42 @@ spatialtf_sampler(PyGpuArrayObject *input, ...@@ -102,9 +114,42 @@ spatialtf_sampler(PyGpuArrayObject *input,
return -1; return -1;
} }
err = cudnnSetTensor4dDescriptor( spatialtf_ctx.ydesc, CUDNN_TENSOR_NCHW, dt,
num_images, num_channels, height, width );
if ( err != CUDNN_STATUS_SUCCESS )
{
spatialtf_context_destroy( &spatialtf_ctx );
cuda_exit( gpu_ctx->ctx );
PyErr_Format( PyExc_RuntimeError,
"Could not initialize ydesc: %s",
cudnnGetErrorString(err) );
return -1;
}
if ( NULL == *output )
{
*output = pygpu_zeros( PyGpuArray_NDIM(om), PyGpuArray_DIMS(om), input->ga.typecode,
GA_C_ORDER, gpu_ctx, Py_None );
if ( NULL == *output )
{
spatialtf_context_destroy( &spatialtf_ctx );
cuda_exit( gpu_ctx->ctx );
PyErr_SetString( PyExc_MemoryError,
"Could allocate memory for spatial transformer's grid sampler" );
return -1;
}
}
const void * input_data = PyGpuArray_DEV_DATA( input );
const void * grid_data = PyGpuArray_DEV_DATA( grid );
void * out_data = PyGpuArray_DEV_DATA( *output );
err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, spatialtf_ctx.xdesc, err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, spatialtf_ctx.xdesc,
PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ), beta_p, input_data, grid_data, beta_p, spatialtf_ctx.ydesc, out_data );
spatialtf_ctx.ydesc, PyGpuArray_DEV_DATA( *output ) );
if ( CUDNN_STATUS_SUCCESS != err ) if ( CUDNN_STATUS_SUCCESS != err )
{ {
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论