Add additional checks of array dimensions and remove dependency on grid dimensions in sampler

Signed-off-by: João Victor Tozatti Risso <joaovictor.risso@gmail.com>

Add additional checks of array dimensions and remove dependency on grid dimensions in sampler
fda82536 · João Victor Tozatti Risso · 7cce8524 · fda82536 · fda82536
--- a/theano/gpuarray/c_code/spatialtf_sampler.c
+++ b/theano/gpuarray/c_code/spatialtf_sampler.c
@@ -25,7 +25,6 @@ void spatialtf_context_destroy( spatialtf_context_t * ctx )
 int
 spatialtf_sampler(PyGpuArrayObject * input,
                  PyGpuArrayObject * grid,
-                  PyArrayObject * grid_dimensions,
                  cudnnSpatialTransformerDescriptor_t desc,
                  double alpha, double beta,
                  PyGpuArrayObject ** output,
@@ -37,11 +36,10 @@ spatialtf_sampler(PyGpuArrayObject * input,
    float af = alpha, bf = beta;
    spatialtf_context_t spatialtf_ctx;
    cudnnDataType_t dt;
-    // Number of color channels (feature maps) is the innermost dimension
    cudnnTensorFormat_t tf = CUDNN_TENSOR_NCHW;
    cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
-    if ( PyArray_DIM( grid_dimensions, 0 ) != 4 )
+    if ( PyGpuArray_NDIM( grid ) != 4 )
    {
        PyErr_SetString( PyExc_RuntimeError,
                         "grid_dimensions must have 4 dimensions" );
@@ -49,10 +47,9 @@ spatialtf_sampler(PyGpuArrayObject * input,
    }
    // Obtain grid dimensions
-    const int num_images = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 0 ) );
+    const int num_images = (int) PyGpuArray_DIM( grid, 0 );
-    const int num_channels = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 1 ) );
+    const int height = (int) PyGpuArray_DIM( grid, 1 );
-    const int height = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 2 ) );
+    const int width = (int) PyGpuArray_DIM( grid, 2 );
-    const int width = (int) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 3 ) );
    switch (input->ga.typecode)
    {
@@ -102,17 +99,16 @@ spatialtf_sampler(PyGpuArrayObject * input,
    const int input_height = (int) PyGpuArray_DIM( input, 2 );
    const int input_width = (int) PyGpuArray_DIM( input, 3 );
-    if ( input_num_images != num_images ||
+    if ( input_num_images != num_images )
-         input_num_channels != num_channels )
    {
        PyErr_Format( PyExc_RuntimeError,
-                      "Input should have %d images and %d channels, got %d images and %d channels.",
+                      "Input should have %d images, got %d images.",
-                       num_images, num_channels, input_num_images, input_num_channels );
+                      num_images, input_num_images );
        return -1;
    }
    err = cudnnSetTensor4dDescriptor( spatialtf_ctx.xdesc, tf, dt, num_images,
-        num_channels, input_height, input_width );
+        input_num_channels, input_height, input_width );
    if ( err != CUDNN_STATUS_SUCCESS )
    {
@@ -139,7 +135,7 @@ spatialtf_sampler(PyGpuArrayObject * input,
    }
    err = cudnnSetTensor4dDescriptor( spatialtf_ctx.ydesc, tf, dt, num_images,
-        num_channels, height, width );
+        input_num_channels, height, width );
    if ( err != CUDNN_STATUS_SUCCESS )
    {
@@ -152,14 +148,14 @@ spatialtf_sampler(PyGpuArrayObject * input,
        return -1;
    }
-    const size_t out_dims[4] = { num_images, num_channels, height, width };
+    const size_t out_dims[4] = { num_images, input_num_channels, height, width };
    if ( NULL == *output ||
-         ! theano_size_check( *output, 4, &(out_dims[0]), (*output)->ga.typecode ) )
+         ! theano_size_check( *output, 4, out_dims, (*output)->ga.typecode ) )
    {
        Py_XDECREF( *output );
-        *output = pygpu_zeros( 4, &(out_dims[0]), input->ga.typecode, GA_C_ORDER,
+        *output = pygpu_empty( 4, out_dims, input->ga.typecode, GA_C_ORDER,
            gpu_ctx, Py_None );
        if ( NULL == *output )
@@ -172,10 +168,6 @@ spatialtf_sampler(PyGpuArrayObject * input,
            return -1;
        }
    }
-    else
-    {
-        GpuArray_memset( &( (*output)->ga ), 0 );
-    }
    if ( ! GpuArray_IS_C_CONTIGUOUS( &(input->ga) ) )
    {

--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -2842,7 +2842,7 @@ class GpuDnnSpatialTfDesc(COp):
    __props__ = ('dimensions', 'dtype')
    params_type = ParamsType(nimages=int_t, nchannels=int_t, height=int_t, width=int_t,
-                             nb_dims=int_t, dtype=cudnn.cudnnDataType_t)
+                             dtype=cudnn.cudnnDataType_t)
    def c_headers(self):
        return ['cudnn.h', 'cudnn_helper.h']
@@ -2862,14 +2862,10 @@ class GpuDnnSpatialTfDesc(COp):
    def __init__(self, dimensions, dtype="float32"):
        COp.__init__(self, ["c_code/spatialtf_desc.c"], "APPLY_SPECIFIC(spatialtf_desc)")
-        # dimensions must have at least width and height
-        assert len(dimensions) >= 2
-        self.dimensions = tuple(dimensions)
        # cuDNN supports only 2D transformations, therefore output tensor must
-        # not exceed 4 dimensions (width, height, num_feature_maps, num_images)
+        # have exactly 4 dimensions: (width, height, num_channels, num_images)
-        assert len(self.dimensions) <= 4
+        assert len(dimensions) == 4
+        self.dimensions = tuple(dimensions)
        assert cudnn.cudnnDataType_t.has_alias(dtype)
        self.dtype = dtype
@@ -2894,8 +2890,6 @@ class GpuDnnSpatialTfDesc(COp):
    height = property(lambda self: self.dimensions[2])
    # Grid width
    width = property(lambda self: self.dimensions[3])
-    # Number of dimensions in the output tensor
-    nb_dims = property(lambda self: len(self.dimensions))
    def c_code_cache_version(self):
        return (super(GpuDnnSpatialTfDesc, self).c_code_cache_version(), version())
@@ -2914,20 +2908,16 @@ class GpuDnnGridGenerator(DnnBase):
    def __init__(self, dtype):
        DnnBase.__init__(self, ["c_code/spatialtf_grid.c"], "spatialtf_grid")
        self.dtype = dtype
-    def dnn_context(self, node):
-        return node.outputs[0].type.context_name
    def make_node(self, grid_dimensions, theta, desc):
        context_name = infer_context_name(desc, theta)
        grid_dimensions = as_tensor_variable(grid_dimensions)
        theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
+        assert theta.dtype in ('float16', 'float32', 'float64')
        assert cudnn.cudnnDataType_t.has_alias(theta.dtype)
-        assert theta.ndim == 3
        # Allocate GPU memory for grid of coordinates
        grid = GpuArrayType(dtype=self.dtype,
@@ -2948,24 +2938,18 @@ class GpuDnnGridSampler(DnnBase):
    """
    __props__ = ('dtype',)
-    _cop_num_inputs = 6
+    _cop_num_inputs = 5
    _cop_num_outputs = 1
    def __init__(self, dtype):
        DnnBase.__init__(self, ["c_code/spatialtf_sampler.c"], "spatialtf_sampler")
        self.dtype = dtype
-    def dnn_context(self, node):
+    def make_node(self, img, grid, desc, alpha=None, beta=None):
-        return node.outputs[0].type.context_name
-    def make_node(self, img, grid, grid_dimensions, desc,
-                  alpha=None, beta=None):
        context_name = infer_context_name(img, grid)
        img = as_gpuarray_variable(img, context_name)
        grid = as_gpuarray_variable(grid, context_name)
-        grid_dimensions = as_tensor_variable(grid_dimensions)
        output = GpuArrayType(dtype=self.dtype,
                              broadcastable=img.type.ndim * (False,),
@@ -2973,11 +2957,6 @@ class GpuDnnGridSampler(DnnBase):
        if img.type.ndim != 4:
            raise TypeError('img must be a 4D tensor')
-        if output.type.ndim != 4:
-            raise TypeError('output must be a 4D tensor')
-        if img.type.ndim != output.type.ndim:
-            raise TypeError('The number of dimensions of img and output must match')
        if (not isinstance(desc.type, CDataType) or
                desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
@@ -2986,8 +2965,7 @@ class GpuDnnGridSampler(DnnBase):
        alpha = ensure_dt(alpha, _one, 'alpha', img.dtype)
        beta = ensure_dt(beta, _zero, 'beta', img.dtype)
-        return Apply(self, [img, grid, grid_dimensions, desc, alpha, beta],
+        return Apply(self, [img, grid, desc, alpha, beta], [output])
-                     [output])
    def L_op(self, inputs, outputs, output_grads):
        pass
@@ -2998,10 +2976,18 @@ def dnn_spatialtf(img, theta, grid_dims, alpha=None, beta=None, dtype=None):
        GPU spatial transformer using cuDNN from NVIDIA.
    """
+    # img is a 4D tensor with shape: (num_images, num_channels, width, height)
+    assert img.ndim == 4
+    # Grid dimensions must be a 4-dimensional tuple
+    assert isinstance(grid_dims, tuple)
+    assert len(grid_dims) == 4
+    # Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
+    assert theta.ndim == 3
    img = gpu_contiguous(img)
    theta = gpu_contiguous(theta)
-    dtype = get_precision(dtype, [img, theta])
+    dtype = img.dtype if dtype is None else dtype
    # Create spatial transformer descriptor
    desc = GpuDnnSpatialTfDesc(grid_dims, dtype)()
@@ -3012,8 +2998,7 @@ def dnn_spatialtf(img, theta, grid_dims, alpha=None, beta=None, dtype=None):
    # Setup grid of coordinates
    grid_coord = GpuDnnGridGenerator(dtype)(grid_dims_var, theta, desc)
-    grid_sampler = GpuDnnGridSampler(dtype)(img, grid_coord, grid_dims_var, desc,
+    grid_sampler = GpuDnnGridSampler(dtype)(img, grid_coord, desc, alpha, beta)
-                                                alpha, beta)
    return grid_sampler