Refactor spatial transformer C implementation to use helper functions

Signed-off-by: João Victor Tozatti Risso <joaovictor.risso@gmail.com>

Refactor spatial transformer C implementation to use helper functions
8fc10168 · João Victor Tozatti Risso · a203ad71 · 8fc10168 · 8fc10168
--- a/theano/gpuarray/c_code/dnn_sptf.c
+++ b/theano/gpuarray/c_code/dnn_sptf.c
-#section support_code
+#section support_code_struct
-typedef struct __spatialtf_context {
+cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc);
-    cudnnTensorDescriptor_t xdesc;
+cudnnTensorDescriptor_t APPLY_SPECIFIC(ydesc);
-    cudnnTensorDescriptor_t ydesc;
-} spatialtf_context_t;
-void spatialtf_context_init( spatialtf_context_t * ctx )
+#section init_code_struct
-{
-    if ( ctx == NULL )
-        return;
-    ctx->xdesc = NULL;
+APPLY_SPECIFIC(xdesc) = NULL;
-    ctx->ydesc = NULL;
+APPLY_SPECIFIC(ydesc) = NULL;
-}
-void spatialtf_context_destroy( spatialtf_context_t * ctx )
 {
-    if ( NULL != ctx->xdesc )
+    cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
-        cudnnDestroyTensorDescriptor( ctx->xdesc );
+    err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) );
+    if ( err != CUDNN_STATUS_SUCCESS )
+    {
+        PyErr_Format( PyExc_MemoryError,
+            "GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor xdesc: %s",
+            cudnnGetErrorString( err ) );
+        FAIL;
+    }
-    if ( NULL != ctx->ydesc )
+    err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(ydesc) );
-        cudnnDestroyTensorDescriptor( ctx->ydesc );
+    if ( err != CUDNN_STATUS_SUCCESS )
+    {
+        PyErr_Format( PyExc_MemoryError,
+            "GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor ydesc: %s",
+            cudnnGetErrorString( err ) );
+        FAIL;
+    }
 }
+#section cleanup_code_struct
+if ( APPLY_SPECIFIC(xdesc) != NULL )
+    cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) );
+if ( APPLY_SPECIFIC(ydesc) != NULL )
+    cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(ydesc) );
 #section support_code_struct
 int
-dnn_sptf(PyGpuArrayObject * input,
+APPLY_SPECIFIC(dnn_sptf)(PyGpuArrayObject * input,
-         PyGpuArrayObject * theta,
+                         PyGpuArrayObject * theta,
-         PyArrayObject * grid_dims,
+                         PyArrayObject * grid_dims,
-         cudnnSpatialTransformerDescriptor_t desc,
+                         cudnnSpatialTransformerDescriptor_t desc,
-         double alpha, double beta,
+                         double alpha, double beta,
-         PyGpuArrayObject ** output,
+                         PyGpuArrayObject ** output,
-         PyGpuArrayObject ** grid,
+                         PyGpuArrayObject ** grid,
-         cudnnHandle_t _handle)
+                         cudnnHandle_t _handle)
 {
    PyGpuContextObject * gpu_ctx = input->context;
    void * alpha_p;
    void * beta_p;
    float af = alpha, bf = beta;
-    spatialtf_context_t spatialtf_ctx;
-    cudnnDataType_t dt;
-    cudnnTensorFormat_t tf = CUDNN_TENSOR_NCHW;
    cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
+    int num_images, num_channels, height, width;
+    size_t gpu_grid_dims[4], out_dims[4];
    switch (input->ga.typecode)
    {
    case GA_DOUBLE:
        alpha_p = (void *)&alpha;
        beta_p = (void *)&beta;
-        dt = CUDNN_DATA_DOUBLE;
        break;
    case GA_FLOAT:
        alpha_p = (void *)&af;
        beta_p = (void *)&bf;
-        dt = CUDNN_DATA_FLOAT;
        break;
    case GA_HALF:
        alpha_p = (void *)&af;
        beta_p = (void *)&bf;
-        dt = CUDNN_DATA_HALF;
        break;
    default:
        PyErr_SetString( PyExc_TypeError,
            "GpuDnnTransformer: unsupported type for input in spatial transformer." );
-        return -1;
+        return 1;
-    }
-    if ( ! GpuArray_IS_C_CONTIGUOUS( &(input->ga) ) )
-    {
-        PyErr_SetString( PyExc_MemoryError,
-            "GpuDnnTransformer: input data is not C-contiguous." );
-        return -1;
    }
    if ( theta->ga.typecode != GA_FLOAT &&
@@ -80,13 +83,7 @@ dnn_sptf(PyGpuArrayObject * input,
    {
        PyErr_SetString( PyExc_TypeError,
            "GpuDnnTransformer: unsupported data type for theta in spatial transformer." );
-        return -1;
+        return 1;
-    }
-    else if ( PyGpuArray_NDIM( theta ) != 3 )
-    {
-        PyErr_Format( PyExc_RuntimeError,
-            "GpuDnnTransformer: theta must have three dimensions!" );
-        return -1;
    }
    else if ( PyGpuArray_DIM( theta, 1 ) != 2 && PyGpuArray_DIM( theta, 2 ) != 3 )
    {
@@ -94,131 +91,78 @@ dnn_sptf(PyGpuArrayObject * input,
            "GpuDnnTransformer: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)",
            PyGpuArray_DIMS( theta )[0], 2, 3, PyGpuArray_DIMS( theta )[0],
            PyGpuArray_DIMS( theta )[1], PyGpuArray_DIMS( theta )[2] );
-        return -1;
+        return 1;
-    }
-    else if ( ! GpuArray_IS_C_CONTIGUOUS( &(theta->ga) ) )
-    {
-        PyErr_SetString( PyExc_MemoryError,
-            "GpuDnnTransformer: theta is not C-contiguous" );
-        return -1;
    }
    if ( PyArray_NDIM( grid_dims ) != 1 || PyArray_SIZE( grid_dims ) != 4 )
    {
-        PyErr_SetString( PyExc_RuntimeError,
+        PyErr_SetString( PyExc_MemoryError,
            "GpuDnnTransformer: grid_dims must have 4 elements." );
-        return -1;
+        return 1;
    }
    // Obtain grid dimensions
-    const int num_images = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 0 ) );
+    num_images = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 0 ) );
-    const int num_channels = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 1 ) );
+    num_channels = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 1 ) );
-    const int height = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 2 ) );
+    height = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 2 ) );
-    const int width = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 3 ) );
+    width = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 3 ) );
-    const size_t gpu_grid_dims[4] = { num_images, height, width, 2 };
-    if ( width == 0 || height == 0 || num_images == 0 )
+    gpu_grid_dims[0] = num_images;
-    {
+    gpu_grid_dims[1] = height;
-        PyErr_SetString( PyExc_RuntimeError,
+    gpu_grid_dims[2] = width;
-            "GpuDnnTransformer: grid_dims has a dimension with value zero" );
+    gpu_grid_dims[3] = 2;
-        return -1;
-    }
-    spatialtf_context_init( &spatialtf_ctx );
+    out_dims[0] = num_images;
+    out_dims[1] = num_channels;
+    out_dims[2] = height;
+    out_dims[3] = width;
-    cuda_enter( gpu_ctx->ctx );
+    if ( width == 0 || height == 0 || num_images == 0 )
-    if ( theano_prep_output( grid, 4, gpu_grid_dims, input->ga.typecode,
-                             GA_C_ORDER, gpu_ctx ) != 0 )
    {
        PyErr_SetString( PyExc_RuntimeError,
-            "GpuDnnTransformer: could not allocate memory for grid of coordinates" );
+            "GpuDnnTransformer: grid_dims has a dimension with value zero" );
-        return -1;
+        return 1;
-    }
-    err = cudnnCreateTensorDescriptor( &(spatialtf_ctx.xdesc) );
-    if ( err != CUDNN_STATUS_SUCCESS )
-    {
-        spatialtf_context_destroy( &spatialtf_ctx );
-        cuda_exit( gpu_ctx->ctx );
-        PyErr_Format( PyExc_RuntimeError,
-            "GpuDnnTransformer: could not create xdesc: %s",
-            cudnnGetErrorString(err) );
-        return -1;
    }
-    // In the input tensor, we must use its width and height, instead
+    if ( PyGpuArray_DIM( input, 0 ) != num_images )
-    // of the grid's width and height. The number of images and channels
-    // should be the same as the grid dimensions
-    const int input_num_images = (int) PyGpuArray_DIM( input, 0 );
-    const int input_num_channels = (int) PyGpuArray_DIM( input, 1 );
-    const int input_height = (int) PyGpuArray_DIM( input, 2 );
-    const int input_width = (int) PyGpuArray_DIM( input, 3 );
-    if ( input_num_images != num_images || input_num_channels != num_channels )
    {
        PyErr_Format( PyExc_RuntimeError,
            "GpuDnnTransformer: expected input to have %d inputs, got %d inputs.",
-            num_images, input_num_images );
+            num_images, PyGpuArray_DIM( input, 0 ) );
-        return -1;
+        return 1;
    }
+    else if ( PyGpuArray_DIM( input, 1 ) != num_channels )
-    err = cudnnSetTensor4dDescriptor( spatialtf_ctx.xdesc, tf, dt, num_images,
-        input_num_channels, input_height, input_width );
-    if ( err != CUDNN_STATUS_SUCCESS )
    {
-        spatialtf_context_destroy( &spatialtf_ctx );
-        cuda_exit( gpu_ctx->ctx );
        PyErr_Format( PyExc_RuntimeError,
-            "GpuDnnTransformer: failed to initialize xdesc: %s",
+            "GpuDnnTransformer: expected input to have %d channels, got %d channels.",
-            cudnnGetErrorString(err) );
+            num_channels, PyGpuArray_DIM( input, 1 ) );
-        return -1;
+        return 1;
    }
-    err = cudnnCreateTensorDescriptor( &(spatialtf_ctx.ydesc) );
+    if ( theano_prep_output( grid, 4, gpu_grid_dims, input->ga.typecode,
+                             GA_C_ORDER, gpu_ctx ) != 0 )
-    if ( err != CUDNN_STATUS_SUCCESS )
-    {
-        spatialtf_context_destroy( &spatialtf_ctx );
-        cuda_exit( gpu_ctx->ctx );
-        PyErr_Format( PyExc_RuntimeError,
-            "GpuDnnTransformer: failed to create ydesc: %s",
-            cudnnGetErrorString(err) );
-        return -1;
-    }
-    err = cudnnSetTensor4dDescriptor( spatialtf_ctx.ydesc, tf, dt, num_images,
-        input_num_channels, height, width );
-    if ( err != CUDNN_STATUS_SUCCESS )
    {
-        spatialtf_context_destroy( &spatialtf_ctx );
+        PyErr_SetString( PyExc_RuntimeError,
-        cuda_exit( gpu_ctx->ctx );
+            "GpuDnnTransformer: could not allocate memory for grid of coordinates" );
+        return 1;
-        PyErr_Format( PyExc_RuntimeError,
-            "GpuDnnTransformer: failed to initialize ydesc: %s",
-            cudnnGetErrorString(err) );
-        return -1;
    }
-    const size_t out_dims[4] = { num_images, input_num_channels, height, width };
    if ( theano_prep_output( output, 4, out_dims, input->ga.typecode,
                             GA_C_ORDER, gpu_ctx ) != 0 )
    {
-        spatialtf_context_destroy( &spatialtf_ctx );
-        cuda_exit( gpu_ctx->ctx );
        PyErr_SetString( PyExc_MemoryError,
            "GpuDnnTransformer: could not allocate memory for grid sampler" );
-        return -1;
+        return 1;
    }
+    if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 )
+        return 1;
+    if ( c_set_tensorNd( *output, APPLY_SPECIFIC(ydesc) ) != 0 )
+        return 1;
+    cuda_enter( gpu_ctx->ctx );
    cuda_wait( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
    cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
    cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
@@ -230,32 +174,30 @@ dnn_sptf(PyGpuArrayObject * input,
    if ( CUDNN_STATUS_SUCCESS != err )
    {
        PyErr_Format( PyExc_RuntimeError,
-            "GpuDnnTransformer: failed to create grid of coordinates: %s",
+            "GpuDnnTransformer: could not create grid of coordinates: %s",
            cudnnGetErrorString( err ) );
-        return -1;
+        cuda_exit( gpu_ctx->ctx );
+        return 1;
    }
-    err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, spatialtf_ctx.xdesc,
+    err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, APPLY_SPECIFIC(xdesc),
        PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( *grid ), beta_p,
-        spatialtf_ctx.ydesc, PyGpuArray_DEV_DATA( *output ) );
+        APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) );
    cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
    cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
    cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
    cuda_record( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
+    cuda_exit( gpu_ctx->ctx );
    if ( CUDNN_STATUS_SUCCESS != err )
    {
        PyErr_Format( PyExc_RuntimeError,
-            "GpuDnnTransformer: failed to create grid sampler: %s",
+            "GpuDnnTransformer: could not create grid sampler: %s",
            cudnnGetErrorString( err ) );
-        spatialtf_context_destroy( &spatialtf_ctx );
+        return 1;
-        cuda_exit( gpu_ctx->ctx );
-        return -1;
    }
-    spatialtf_context_destroy( &spatialtf_ctx );
-    cuda_exit( gpu_ctx->ctx );
    return 0;
 }
--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -2898,7 +2898,7 @@ class GpuDnnTransformer(DnnBase):
    default_output = 0
    def __init__(self, dtype):
-        DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "dnn_sptf")
+        DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "APPLY_SPECIFIC(dnn_sptf)")
        self.dtype = dtype
    def make_node(self, img, theta, output, grid_dims, desc, alpha=None, beta=None):