Change spatialtf implementation to use NCHW tensors

Signed-off-by: João Victor Tozatti Risso <joaovictor.risso@gmail.com>

Change spatialtf implementation to use NCHW tensors
278d5e8d · João Victor Tozatti Risso · 0c5b9306 · 278d5e8d · 278d5e8d · 278d5e8d
--- a/theano/gpuarray/c_code/spatialtf_desc.c
+++ b/theano/gpuarray/c_code/spatialtf_desc.c
@@ -5,8 +5,15 @@ int APPLY_SPECIFIC(spatialtf_desc)(cudnnSpatialTransformerDescriptor_t * desc,
 {
    cudnnStatus_t err;

-    // num_channels, width, height, num_images
-    const int out_tensor_dims[4] = { params->nimages, params->height, params->width, params->nchannels };
+    if ( params->nimages == 0 || params->nchannels == 0 ||
+         params->height == 0 || params->width == 0 )
+    {
+        PyErr_SetString( PyExc_RuntimeError, "Invalid grid dimensions" );
+        return -1;
+    }
+
+    // num_images, num_channels, height, width
+    const int out_tensor_dims[4] = { params->nimages, params->nchannels, params->height, params->width };

    err = cudnnCreateSpatialTransformerDescriptor( desc );
    if ( CUDNN_STATUS_SUCCESS != err )

--- a/theano/gpuarray/c_code/spatialtf_grid.c
+++ b/theano/gpuarray/c_code/spatialtf_grid.c
@@ -43,8 +43,9 @@ spatialtf_grid(PyArrayObject * grid_dimensions,

    // Obtain grid dimensions
    const size_t num_images = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 0 ) );
-    const size_t height = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 1 ) );
-    const size_t width = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 2 ) );
+    // Dimension 1 is the number of image channels
+    const size_t height = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 2 ) );
+    const size_t width = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 3 ) );

    // Grid of coordinates is of size num_images * height * width * 2 for a 2D transformation
    const size_t grid_dims[4] = { num_images, height, width, 2 };

--- a/theano/gpuarray/c_code/spatialtf_sampler.c
+++ b/theano/gpuarray/c_code/spatialtf_sampler.c
@@ -38,7 +38,7 @@ spatialtf_sampler(PyGpuArrayObject * input,
    spatialtf_context_t spatialtf_ctx;
    cudnnDataType_t dt;
    // Number of color channels (feature maps) is the innermost dimension
-    cudnnTensorFormat_t tf = CUDNN_TENSOR_NHWC;
+    cudnnTensorFormat_t tf = CUDNN_TENSOR_NCHW;
    cudnnStatus_t err = CUDNN_STATUS_SUCCESS;

    if ( PyArray_DIM( grid_dimensions, 0 ) != 4 )
@@ -50,11 +50,11 @@ spatialtf_sampler(PyGpuArrayObject * input,

    // Obtain grid dimensions
    const int num_images = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 0 ) );
-    const int height = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 1 ) );
-    const int width = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 2 ) );
-    const int num_channels = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 3 ) );
+    const int num_channels = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 1 ) );
+    const int height = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 2 ) );
+    const int width = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 3 ) );

-    switch (grid->ga.typecode)
+    switch (input->ga.typecode)
    {
    case GA_DOUBLE:
        alpha_p = (void *)&alpha;
@@ -98,9 +98,9 @@ spatialtf_sampler(PyGpuArrayObject * input,
    // of the grid's width and height. The number of images and channels
    // should be the same as the grid dimensions
    const int input_num_images = (int) PyGpuArray_DIM( input, 0 );
-    const int input_height = (int) PyGpuArray_DIM( input, 1 );
-    const int input_width = (int) PyGpuArray_DIM( input, 2 );
-    const int input_num_channels = (int) PyGpuArray_DIM( input, 3 );
+    const int input_num_channels = (int) PyGpuArray_DIM( input, 1 );
+    const int input_height = (int) PyGpuArray_DIM( input, 2 );
+    const int input_width = (int) PyGpuArray_DIM( input, 3 );

    if ( input_num_images != num_images ||
         input_num_channels != num_channels )
@@ -154,8 +154,7 @@ spatialtf_sampler(PyGpuArrayObject * input,

    if ( NULL == *output )
    {
-        // (num_images, height, width, num_channels )
-        const size_t out_dims[4] = { num_images, height, width, num_channels };
+        const size_t out_dims[4] = { num_images, num_channels, height, width };

        *output = pygpu_zeros( 4, &(out_dims[0]), input->ga.typecode, GA_C_ORDER,
            gpu_ctx, Py_None );

--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -2841,7 +2841,7 @@ class GpuDnnSpatialTfDesc(COp):
    """

    __props__ = ('dimensions', 'precision')
-    params_type = ParamsType(nimages=int_t, height=int_t, width=int_t, nchannels=int_t,
+    params_type = ParamsType(nimages=int_t, nchannels=int_t, height=int_t, width=int_t,
                             nb_dims=int_t, precision=cudnn.cudnnDataType_t)

    def c_headers(self):
@@ -2886,14 +2886,14 @@ class GpuDnnSpatialTfDesc(COp):
        out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
        return node

-    # Grid width
+    # Number of images
    nimages = property(lambda self: self.dimensions[0])
+    # Number of channels
+    nchannels = property(lambda self: self.dimensions[1])
    # Grid height
-    height = property(lambda self: self.dimensions[1])
-    # Number of feature maps
-    width = property(lambda self: self.dimensions[2])
-    # Number of images
-    nchannels = property(lambda self: self.dimensions[3])
+    height = property(lambda self: self.dimensions[2])
+    # Grid width
+    width = property(lambda self: self.dimensions[3])
    # Number of dimensions in the output tensor
    nb_dims = property(lambda self: len(self.dimensions))