Merge spatial transformer implementation into a single Op, GpuDnnTransformer

上级 09e362f2
#section support_code #section support_code
typedef struct __spatialtf_context { typedef struct __spatialtf_context {
PyGpuArrayObject * grid;
cudnnTensorDescriptor_t xdesc; cudnnTensorDescriptor_t xdesc;
cudnnTensorDescriptor_t ydesc; cudnnTensorDescriptor_t ydesc;
} spatialtf_context_t; } spatialtf_context_t;
void spatialtf_context_init( spatialtf_context_t * ctx ) void spatialtf_context_init( spatialtf_context_t * ctx )
{ {
if ( ctx == NULL )
return;
ctx->grid = NULL;
ctx->xdesc = NULL; ctx->xdesc = NULL;
ctx->ydesc = NULL; ctx->ydesc = NULL;
} }
void spatialtf_context_destroy( spatialtf_context_t * ctx ) void spatialtf_context_destroy( spatialtf_context_t * ctx )
{ {
Py_XDECREF( ctx->grid );
if ( NULL != ctx->xdesc ) if ( NULL != ctx->xdesc )
cudnnDestroyTensorDescriptor( ctx->xdesc ); cudnnDestroyTensorDescriptor( ctx->xdesc );
...@@ -23,8 +30,9 @@ void spatialtf_context_destroy( spatialtf_context_t * ctx ) ...@@ -23,8 +30,9 @@ void spatialtf_context_destroy( spatialtf_context_t * ctx )
#section support_code_struct #section support_code_struct
int int
spatialtf_sampler(PyGpuArrayObject * input, dnn_sptf(PyGpuArrayObject * input,
PyGpuArrayObject * grid, PyGpuArrayObject * theta,
PyArrayObject * grid_dims,
cudnnSpatialTransformerDescriptor_t desc, cudnnSpatialTransformerDescriptor_t desc,
double alpha, double beta, double alpha, double beta,
PyGpuArrayObject ** output, PyGpuArrayObject ** output,
...@@ -39,18 +47,6 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -39,18 +47,6 @@ spatialtf_sampler(PyGpuArrayObject * input,
cudnnTensorFormat_t tf = CUDNN_TENSOR_NCHW; cudnnTensorFormat_t tf = CUDNN_TENSOR_NCHW;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if ( PyGpuArray_NDIM( grid ) != 4 )
{
PyErr_SetString( PyExc_RuntimeError,
"grid_dimensions must have 4 dimensions" );
return -1;
}
// Obtain grid dimensions
const int num_images = (int) PyGpuArray_DIM( grid, 0 );
const int height = (int) PyGpuArray_DIM( grid, 1 );
const int width = (int) PyGpuArray_DIM( grid, 2 );
switch (input->ga.typecode) switch (input->ga.typecode)
{ {
case GA_DOUBLE: case GA_DOUBLE:
...@@ -70,7 +66,63 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -70,7 +66,63 @@ spatialtf_sampler(PyGpuArrayObject * input,
break; break;
default: default:
PyErr_SetString( PyExc_TypeError, PyErr_SetString( PyExc_TypeError,
"Unsupported type in spatial transformer sampler" ); "GpuDnnTransformer: unsupported type in spatial transformer sampler" );
return -1;
}
if ( ! GpuArray_IS_C_CONTIGUOUS( &(input->ga) ) )
{
PyErr_SetString( PyExc_MemoryError,
"GpuDnnTransformer: input data is not C-contiguous" );
return -1;
}
if ( theta->ga.typecode != GA_FLOAT &&
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
{
PyErr_SetString( PyExc_TypeError, "GpuDnnTransformer: unsupported data type for theta" );
return -1;
}
else if ( PyGpuArray_NDIM( theta ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformer: theta must have three dimensions!" );
return -1;
}
else if ( PyGpuArray_DIM( theta, 1 ) != 2 && PyGpuArray_DIM( theta, 2 ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformer: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)",
PyGpuArray_DIMS( theta )[0], 2, 3, PyGpuArray_DIMS( theta )[0],
PyGpuArray_DIMS( theta )[1], PyGpuArray_DIMS( theta )[2] );
return -1;
}
else if ( ! GpuArray_IS_C_CONTIGUOUS( &(theta->ga) ) )
{
PyErr_SetString( PyExc_MemoryError,
"GpuDnnTransformer: theta is not C-contiguous" );
return -1;
}
if ( PyArray_NDIM( grid_dims ) != 1 || PyArray_SIZE( grid_dims ) != 4 )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformer: grid_dims must have 4 elements." );
return -1;
}
// Obtain grid dimensions
const int num_images = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 0 ) );
const int num_channels = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 1 ) );
const int height = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 2 ) );
const int width = (int) *( (npy_int *) PyArray_GETPTR1( grid_dims, 3 ) );
const size_t gpu_grid_dims[4] = { num_images, height, width, 2 };
if ( width == 0 || height == 0 || num_images == 0 )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformer: grid_dims has a dimension with value zero" );
return -1; return -1;
} }
...@@ -78,6 +130,16 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -78,6 +130,16 @@ spatialtf_sampler(PyGpuArrayObject * input,
cuda_enter( gpu_ctx->ctx ); cuda_enter( gpu_ctx->ctx );
spatialtf_ctx.grid = pygpu_empty(4, &(gpu_grid_dims[0]), input->ga.typecode, GA_C_ORDER,
gpu_ctx, Py_None);
if ( spatialtf_ctx.grid == NULL )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformer: could not allocate memory for grid of coordinates" );
return -1;
}
err = cudnnCreateTensorDescriptor( &(spatialtf_ctx.xdesc) ); err = cudnnCreateTensorDescriptor( &(spatialtf_ctx.xdesc) );
if ( err != CUDNN_STATUS_SUCCESS ) if ( err != CUDNN_STATUS_SUCCESS )
...@@ -86,7 +148,7 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -86,7 +148,7 @@ spatialtf_sampler(PyGpuArrayObject * input,
cuda_exit( gpu_ctx->ctx ); cuda_exit( gpu_ctx->ctx );
PyErr_Format( PyExc_RuntimeError, PyErr_Format( PyExc_RuntimeError,
"Could not create xdesc: %s", "GpuDnnTransformer: could not create xdesc: %s",
cudnnGetErrorString(err) ); cudnnGetErrorString(err) );
return -1; return -1;
} }
...@@ -99,10 +161,10 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -99,10 +161,10 @@ spatialtf_sampler(PyGpuArrayObject * input,
const int input_height = (int) PyGpuArray_DIM( input, 2 ); const int input_height = (int) PyGpuArray_DIM( input, 2 );
const int input_width = (int) PyGpuArray_DIM( input, 3 ); const int input_width = (int) PyGpuArray_DIM( input, 3 );
if ( input_num_images != num_images ) if ( input_num_images != num_images || input_num_channels != num_channels )
{ {
PyErr_Format( PyExc_RuntimeError, PyErr_Format( PyExc_RuntimeError,
"Input should have %d images, got %d images.", "GpuDnnTransformer: expected input to have %d inputs, got %d inputs.",
num_images, input_num_images ); num_images, input_num_images );
return -1; return -1;
} }
...@@ -116,7 +178,7 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -116,7 +178,7 @@ spatialtf_sampler(PyGpuArrayObject * input,
cuda_exit( gpu_ctx->ctx ); cuda_exit( gpu_ctx->ctx );
PyErr_Format( PyExc_RuntimeError, PyErr_Format( PyExc_RuntimeError,
"Could not initialize xdesc: %s", "GpuDnnTransformer: failed to initialize xdesc: %s",
cudnnGetErrorString(err) ); cudnnGetErrorString(err) );
return -1; return -1;
} }
...@@ -129,7 +191,7 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -129,7 +191,7 @@ spatialtf_sampler(PyGpuArrayObject * input,
cuda_exit( gpu_ctx->ctx ); cuda_exit( gpu_ctx->ctx );
PyErr_Format( PyExc_RuntimeError, PyErr_Format( PyExc_RuntimeError,
"Could not create ydesc: %s", "GpuDnnTransformer: failed to create ydesc: %s",
cudnnGetErrorString(err) ); cudnnGetErrorString(err) );
return -1; return -1;
} }
...@@ -143,70 +205,51 @@ spatialtf_sampler(PyGpuArrayObject * input, ...@@ -143,70 +205,51 @@ spatialtf_sampler(PyGpuArrayObject * input,
cuda_exit( gpu_ctx->ctx ); cuda_exit( gpu_ctx->ctx );
PyErr_Format( PyExc_RuntimeError, PyErr_Format( PyExc_RuntimeError,
"Could not initialize ydesc: %s", "GpuDnnTransformer: failed to initialize ydesc: %s",
cudnnGetErrorString(err) ); cudnnGetErrorString(err) );
return -1; return -1;
} }
const size_t out_dims[4] = { num_images, input_num_channels, height, width }; const size_t out_dims[4] = { num_images, input_num_channels, height, width };
if ( NULL == *output || if ( theano_prep_output( output, 4, out_dims, input->ga.typecode,
! theano_size_check( *output, 4, out_dims, (*output)->ga.typecode ) ) GA_C_ORDER, gpu_ctx ) != 0 )
{
Py_XDECREF( *output );
*output = pygpu_empty( 4, out_dims, input->ga.typecode, GA_C_ORDER,
gpu_ctx, Py_None );
if ( NULL == *output )
{ {
spatialtf_context_destroy( &spatialtf_ctx ); spatialtf_context_destroy( &spatialtf_ctx );
cuda_exit( gpu_ctx->ctx ); cuda_exit( gpu_ctx->ctx );
PyErr_SetString( PyExc_MemoryError, PyErr_SetString( PyExc_MemoryError,
"Could allocate memory for spatial transformer's grid sampler" ); "GpuDnnTransformer: could not allocate memory for grid sampler" );
return -1; return -1;
} }
}
if ( ! GpuArray_IS_C_CONTIGUOUS( &(input->ga) ) ) cuda_wait( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
{ cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
PyErr_SetString( PyExc_MemoryError, cuda_wait( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
"input data is not C-contiguous" );
return -1;
}
if ( ! GpuArray_IS_C_CONTIGUOUS( &(grid->ga) ) ) err = cudnnSpatialTfGridGeneratorForward( _handle, desc, PyGpuArray_DEV_DATA( theta ),
{ PyGpuArray_DEV_DATA( spatialtf_ctx.grid ) );
PyErr_SetString( PyExc_MemoryError,
"grid data is not C-contiguous" );
return -1;
}
if ( ! GpuArray_IS_C_CONTIGUOUS( &((*output)->ga) ) ) if ( CUDNN_STATUS_SUCCESS != err )
{ {
PyErr_SetString( PyExc_MemoryError, PyErr_Format( PyExc_RuntimeError,
"theta data is not C-contiguous" ); "GpuDnnTransformer: failed to create grid of coordinates: %s",
cudnnGetErrorString( err ) );
return -1; return -1;
} }
cuda_wait( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
const void * input_data = PyGpuArray_DEV_DATA( input );
const void * grid_data = PyGpuArray_DEV_DATA( grid );
void * out_data = PyGpuArray_DEV_DATA( *output );
err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, spatialtf_ctx.xdesc, err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, spatialtf_ctx.xdesc,
input_data, grid_data, beta_p, spatialtf_ctx.ydesc, out_data ); PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( spatialtf_ctx.grid ),
beta_p, spatialtf_ctx.ydesc, PyGpuArray_DEV_DATA( *output ) );
cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_record( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
if ( CUDNN_STATUS_SUCCESS != err ) if ( CUDNN_STATUS_SUCCESS != err )
{ {
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformer: failed to create grid sampler" );
spatialtf_context_destroy( &spatialtf_ctx ); spatialtf_context_destroy( &spatialtf_ctx );
cuda_exit( gpu_ctx->ctx ); cuda_exit( gpu_ctx->ctx );
return -1; return -1;
......
#section support_code
int
spatialtf_grid(PyArrayObject * grid_dimensions,
PyGpuArrayObject * theta,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** grid,
cudnnHandle_t _handle)
{
PyGpuContextObject * gpu_ctx = theta->context;
cudnnStatus_t err;
if ( theta->ga.typecode != GA_FLOAT &&
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
{
PyErr_SetString( PyExc_TypeError, "Unsupported data type for theta" );
return -1;
}
if ( PyGpuArray_NDIM( theta ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"theta must have three dimensions!" );
return -1;
}
if ( PyGpuArray_DIM( theta, 1 ) != 2 && PyGpuArray_DIM( theta, 2 ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"Incorrect dimensions for theta, should be (%d, %d, %d), got (%d, %d, %d)",
PyGpuArray_DIMS( theta )[0], 2, 3, PyGpuArray_DIMS( theta )[0],
PyGpuArray_DIMS( theta )[1], PyGpuArray_DIMS( theta )[2] );
return -1;
}
if ( PyArray_DIM( grid_dimensions, 0 ) != 4 )
{
PyErr_Format( PyExc_RuntimeError,
"grid_dimensions must have 4 dimensions!" );
return -1;
}
// Obtain grid dimensions
const size_t num_images = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 0 ) );
// Dimension 1 is the number of image channels
const size_t height = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 2 ) );
const size_t width = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 3 ) );
// Grid of coordinates is of size num_images * height * width * 2 for a 2D transformation
const size_t grid_dims[4] = { num_images, height, width, 2 };
if ( width == 0 || height == 0 || num_images == 0 )
{
PyErr_Format( PyExc_RuntimeError,
"One of the grid dimensions is zero" );
return -1;
}
if ( NULL == *grid ||
! theano_size_check( *grid, 4, grid_dims, (*grid)->ga.typecode ) )
{
Py_XDECREF( *grid );
*grid = pygpu_empty( 4, grid_dims, theta->ga.typecode, GA_C_ORDER,
gpu_ctx, Py_None );
if ( NULL == *grid )
{
PyErr_SetString( PyExc_MemoryError,
"Could not allocate memory for grid of coordinates" );
return -1;
}
}
if ( ! GpuArray_IS_C_CONTIGUOUS( &(theta->ga) ) )
{
PyErr_SetString( PyExc_MemoryError,
"theta data is not C-contiguous" );
return -1;
}
if ( ! GpuArray_IS_C_CONTIGUOUS( &((*grid)->ga) ) )
{
PyErr_SetString( PyExc_MemoryError,
"grid data is not C-contiguous" );
return -1;
}
cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
const void * theta_data = PyGpuArray_DEV_DATA( theta );
void * grid_data = PyGpuArray_DEV_DATA( *grid );
err = cudnnSpatialTfGridGeneratorForward( _handle, desc, theta_data, grid_data );
cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_RuntimeError,
"Failed to create grid of coordinates: %s",
cudnnGetErrorString( err ) );
return -1;
}
return 0;
}
...@@ -2833,7 +2833,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2833,7 +2833,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
return [rval] return [rval]
class GpuDnnSpatialTfDesc(COp): class _GpuDnnTransformerDescriptor(COp):
""" """
This Op builds a spatial transformer descriptor for use in spatial transformer network This Op builds a spatial transformer descriptor for use in spatial transformer network
...@@ -2859,13 +2859,13 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2859,13 +2859,13 @@ class GpuDnnSpatialTfDesc(COp):
return False return False
def __init__(self, dtype=theano.config.floatX): def __init__(self, dtype=theano.config.floatX):
COp.__init__(self, ["c_code/spatialtf_desc.c"], "APPLY_SPECIFIC(spatialtf_desc)") COp.__init__(self, ["c_code/dnn_sptf_desc.c"], "APPLY_SPECIFIC(dnn_sptf_desc)")
assert cudnn.cudnnDataType_t.has_alias(dtype) assert cudnn.cudnnDataType_t.has_alias(dtype)
self.dtype = dtype self.dtype = dtype
def make_node(self, dimensions): def make_node(self, dimensions):
# cuDNN supports only 2D transformations, therefore output tensor must # cuDNN supports only 2D transformations, and the output tensor must
# have exactly 4 dimensions: (num_images, num_channels, height, width) # have exactly 4 dimensions: (num_images, num_channels, height, width)
assert len(dimensions) == 4 assert len(dimensions) == 4
dimensions = tuple(dimensions) dimensions = tuple(dimensions)
...@@ -2883,63 +2883,31 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2883,63 +2883,31 @@ class GpuDnnSpatialTfDesc(COp):
return node return node
def c_code_cache_version(self): def c_code_cache_version(self):
return (super(GpuDnnSpatialTfDesc, self).c_code_cache_version(), version()) return (super(_GpuDnnTransformerDescriptor, self).c_code_cache_version(), version())
class GpuDnnGridGenerator(DnnBase): class GpuDnnTransformer(DnnBase):
""" """
This Op builds a spatial transformer grid generator for use in spatial transformer network This Op builds a spatial transformer that can be used in spatial transformer networks.
operations.
""" """
__props__ = ('dtype',) __props__ = ('dtype',)
_cop_num_inputs = 3 _cop_num_inputs = 6
_cop_num_outputs = 1 _cop_num_outputs = 1
_f16_ok = True
def __init__(self, dtype): def __init__(self, dtype):
DnnBase.__init__(self, ["c_code/spatialtf_grid.c"], "spatialtf_grid") DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "dnn_sptf")
self.dtype = dtype self.dtype = dtype
def make_node(self, grid_dimensions, theta, desc): def make_node(self, img, theta, grid_dims, desc, alpha=None, beta=None):
context_name = infer_context_name(desc, theta)
grid_dimensions = as_tensor_variable(grid_dimensions)
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64') assert theta.dtype in ('float16', 'float32', 'float64')
# Allocate GPU memory for grid of coordinates context_name = infer_context_name(img)
grid = GpuArrayType(dtype=self.dtype,
broadcastable=(False, False, False, False,),
context_name=context_name)()
return Apply(self, [grid_dimensions, theta, desc], [grid])
def L_op(self, inputs, outputs, output_grads):
pass
class GpuDnnGridSampler(DnnBase):
"""
This Op builds a spatial transformer grid sampler for use in spatial transformer network
operations.
"""
__props__ = ('dtype',)
_cop_num_inputs = 5
_cop_num_outputs = 1
def __init__(self, dtype):
DnnBase.__init__(self, ["c_code/spatialtf_sampler.c"], "spatialtf_sampler")
self.dtype = dtype
def make_node(self, img, grid, desc, alpha=None, beta=None):
context_name = infer_context_name(img, grid)
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
img = as_gpuarray_variable(img, context_name) img = as_gpuarray_variable(img, context_name)
grid = as_gpuarray_variable(grid, context_name) grid_dims = as_tensor_variable(grid_dims)
output = GpuArrayType(dtype=self.dtype, output = GpuArrayType(dtype=self.dtype,
broadcastable=img.type.ndim * (False,), broadcastable=img.type.ndim * (False,),
...@@ -2955,9 +2923,9 @@ class GpuDnnGridSampler(DnnBase): ...@@ -2955,9 +2923,9 @@ class GpuDnnGridSampler(DnnBase):
alpha = ensure_dt(alpha, _one, 'alpha', img.dtype) alpha = ensure_dt(alpha, _one, 'alpha', img.dtype)
beta = ensure_dt(beta, _zero, 'beta', img.dtype) beta = ensure_dt(beta, _zero, 'beta', img.dtype)
return Apply(self, [img, grid, desc, alpha, beta], [output]) return Apply(self, [img, theta, grid_dims, desc, alpha, beta], [output])
def L_op(self, inputs, outputs, output_grads): def L_op(self, inputs, outputs, grads):
pass pass
...@@ -3011,13 +2979,12 @@ def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=No ...@@ -3011,13 +2979,12 @@ def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=No
theta = gpu_contiguous(theta) theta = gpu_contiguous(theta)
# Create spatial transformer descriptor # Create spatial transformer descriptor
desc = GpuDnnSpatialTfDesc(dtype)(grid_dims) desc = _GpuDnnTransformerDescriptor(dtype)(grid_dims)
# Create grid dimensions variable # Create grid dimensions variable
grid_dims_var = as_tensor_variable(grid_dims) grid_dims_var = as_tensor_variable(grid_dims)
# Setup and return sampling grid # Setup spatial transformer
grid_coord = GpuDnnGridGenerator(dtype)(grid_dims_var, theta, desc) transformer = GpuDnnTransformer(dtype)(inp, theta, grid_dims_var, desc, alpha, beta)
grid_sampler = GpuDnnGridSampler(dtype)(inp, grid_coord, desc, alpha, beta) return transformer
return grid_sampler
@local_optimizer([AbstractConv2d, AbstractConv3d]) @local_optimizer([AbstractConv2d, AbstractConv3d])
......
...@@ -2440,7 +2440,6 @@ def test_dnn_spatialtf(): ...@@ -2440,7 +2440,6 @@ def test_dnn_spatialtf():
img = np.random.randint(low=0, high=256, size=img_dims) img = np.random.randint(low=0, high=256, size=img_dims)
# Convert from NHWC to NCHW # Convert from NHWC to NCHW
img = np.transpose(img, axes=(0, 3, 1, 2)).astype(theano.config.floatX) img = np.transpose(img, axes=(0, 3, 1, 2)).astype(theano.config.floatX)
gpu_img = gpuarray_shared_constructor(img)
# Downsample image dimensions by a factor of 2, i.e. our output tensor will # Downsample image dimensions by a factor of 2, i.e. our output tensor will
# have shape (n, c, h / 2, w / 2) # have shape (n, c, h / 2, w / 2)
scale_height = 0.25 scale_height = 0.25
...@@ -2451,25 +2450,25 @@ def test_dnn_spatialtf(): ...@@ -2451,25 +2450,25 @@ def test_dnn_spatialtf():
[0, -1, 0]] [0, -1, 0]]
transform = np.asarray(img_dims[0] * [theta], dtype=theano.config.floatX) transform = np.asarray(img_dims[0] * [theta], dtype=theano.config.floatX)
gpu_transform = gpuarray_shared_constructor(transform)
st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, scale_height=scale_height, # Create symbolic variables for inputs and transformations
t_img = T.tensor4('img')
t_theta = T.tensor3('theta')
st_dnn = dnn.dnn_spatialtf(t_img, t_theta, scale_height=scale_height,
scale_width=scale_width) scale_width=scale_width)
st_dnn_func = theano.function([], [st_dnn]) st_dnn_func = theano.function([t_img, t_theta], [st_dnn])
img_out_gpu, = st_dnn_func(img, transform)
img_out = np.asarray(img_out_gpu)
# Check if function graph contains the spatial transformer Ops # Check if function graph contains the spatial transformer Ops
topo = st_dnn_func.maker.fgraph.toposort() topo = st_dnn_func.maker.fgraph.toposort()
assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnGridGenerator)]) == 1 assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnTransformer)]) == 1
assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnGridSampler)]) == 1
# Setup CPU Op # Setup CPU Op
t_img = T.tensor4('img')
t_theta = T.tensor3('theta')
st_cpu = spatialtf_cpu(t_theta, t_img, scale_height, scale_width, 'nearest') st_cpu = spatialtf_cpu(t_theta, t_img, scale_height, scale_width, 'nearest')
st_cpu_func = theano.function([t_theta, t_img], [st_cpu], mode=mode_without_gpu) st_cpu_func = theano.function([t_theta, t_img], [st_cpu], mode=mode_without_gpu)
res, = st_cpu_func(transform, img) res, = st_cpu_func(transform, img)
img_out_gpu = st_dnn_func()
img_out = np.asarray(img_out_gpu[0])
utt.assert_allclose(img_out, res, rtol=1e-2, atol=1e-2) utt.assert_allclose(img_out, res, rtol=1e-2, atol=1e-2)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论