提交 c25df18f authored 作者: João Victor Risso's avatar João Victor Risso

Split spatial transformer implementation into grid and sampler Ops

上级 659b7c8f
#section support_code_struct
int
APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
PyArrayObject * grid_dims,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** grid,
cudnnHandle_t _handle)
{
PyGpuContextObject * gpu_ctx = theta->context;
size_t gpu_grid_dims[4];
int num_images, num_channels, height, width;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if ( theta->ga.typecode != GA_FLOAT &&
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
{
PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer." );
return 1;
}
else if ( PyGpuArray_DIM( theta, 1 ) != 2 && PyGpuArray_DIM( theta, 2 ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)",
PyGpuArray_DIMS( theta )[0], 2, 3, PyGpuArray_DIMS( theta )[0],
PyGpuArray_DIMS( theta )[1], PyGpuArray_DIMS( theta )[2] );
return 1;
}
if ( PyArray_NDIM( grid_dims ) != 1 || PyArray_SIZE( grid_dims ) != 4 )
{
PyErr_SetString( PyExc_MemoryError,
"GpuDnnTransformerGrid: grid_dims must have 4 elements." );
return 1;
}
// Obtain grid dimensions
num_images = (int) *( (npy_int64 *) PyArray_GETPTR1( grid_dims, 0 ) );
height = (int) *( (npy_int64 *) PyArray_GETPTR1( grid_dims, 2 ) );
width = (int) *( (npy_int64 *) PyArray_GETPTR1( grid_dims, 3 ) );
gpu_grid_dims[0] = num_images;
gpu_grid_dims[1] = height;
gpu_grid_dims[2] = width;
gpu_grid_dims[3] = 2;
if ( theano_prep_output( grid, 4, gpu_grid_dims, theta->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformerGrid: could not allocate memory for grid of coordinates" );
return 1;
}
cuda_enter( gpu_ctx->ctx );
cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfGridGeneratorForward( _handle, desc, PyGpuArray_DEV_DATA( theta ),
PyGpuArray_DEV_DATA( *grid ) );
cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
cuda_exit( gpu_ctx->ctx );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformerGrid: could not create grid of coordinates: %s",
cudnnGetErrorString( err ) );
return 1;
}
return 0;
}
\ No newline at end of file
......@@ -14,7 +14,7 @@ APPLY_SPECIFIC(ydesc) = NULL;
if ( err != CUDNN_STATUS_SUCCESS )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor xdesc: %s",
"GpuDnnTransformerSampler: failed to allocate cuDNN tensor descriptor xdesc: %s",
cudnnGetErrorString( err ) );
FAIL;
}
......@@ -23,7 +23,7 @@ APPLY_SPECIFIC(ydesc) = NULL;
if ( err != CUDNN_STATUS_SUCCESS )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor ydesc: %s",
"GpuDnnTransformerSampler: failed to allocate cuDNN tensor descriptor ydesc: %s",
cudnnGetErrorString( err ) );
FAIL;
}
......@@ -40,22 +40,19 @@ if ( APPLY_SPECIFIC(ydesc) != NULL )
#section support_code_struct
int
APPLY_SPECIFIC(dnn_sptf)(PyGpuArrayObject * input,
PyGpuArrayObject * theta,
PyArrayObject * grid_dims,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** output,
PyGpuArrayObject ** grid,
cudnnHandle_t _handle)
APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
PyGpuArrayObject * grid,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** output,
cudnnHandle_t _handle)
{
PyGpuContextObject * gpu_ctx = input->context;
void * alpha_p;
void * beta_p;
double alpha = 1.0, beta = 0.0;
float af = alpha, bf = beta;
size_t out_dims[4];
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
int num_images, num_channels, height, width;
size_t gpu_grid_dims[4], out_dims[4];
switch (input->ga.typecode)
{
......@@ -77,81 +74,23 @@ APPLY_SPECIFIC(dnn_sptf)(PyGpuArrayObject * input,
return 1;
}
if ( theta->ga.typecode != GA_FLOAT &&
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
{
PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformer: unsupported data type for theta in spatial transformer." );
return 1;
}
else if ( PyGpuArray_DIM( theta, 1 ) != 2 && PyGpuArray_DIM( theta, 2 ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformer: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)",
PyGpuArray_DIMS( theta )[0], 2, 3, PyGpuArray_DIMS( theta )[0],
PyGpuArray_DIMS( theta )[1], PyGpuArray_DIMS( theta )[2] );
return 1;
}
out_dims[0] = (size_t) PyGpuArray_DIM(input, 0); // num_images
out_dims[1] = (size_t) PyGpuArray_DIM(input, 1); // num_channels
out_dims[2] = (size_t) PyGpuArray_DIM(grid, 1); // grid width
out_dims[3] = (size_t) PyGpuArray_DIM(grid, 2); // grid height
if ( PyArray_NDIM( grid_dims ) != 1 || PyArray_SIZE( grid_dims ) != 4 )
{
PyErr_SetString( PyExc_MemoryError,
"GpuDnnTransformer: grid_dims must have 4 elements." );
return 1;
}
// Obtain grid dimensions
num_images = (int) *( (npy_int64 *) PyArray_GETPTR1( grid_dims, 0 ) );
num_channels = (int) *( (npy_int64 *) PyArray_GETPTR1( grid_dims, 1 ) );
height = (int) *( (npy_int64 *) PyArray_GETPTR1( grid_dims, 2 ) );
width = (int) *( (npy_int64 *) PyArray_GETPTR1( grid_dims, 3 ) );
gpu_grid_dims[0] = num_images;
gpu_grid_dims[1] = height;
gpu_grid_dims[2] = width;
gpu_grid_dims[3] = 2;
out_dims[0] = num_images;
out_dims[1] = num_channels;
out_dims[2] = height;
out_dims[3] = width;
if ( width == 0 || height == 0 || num_images == 0 )
if ( out_dims[0] == 0 || out_dims[1] == 0 || out_dims[2] == 0 || out_dims[3] == 0 )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformer: grid_dims has a dimension with value zero" );
return 1;
}
if ( PyGpuArray_DIM( input, 0 ) != num_images )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformer: expected batch size %d, got %d.",
num_images, PyGpuArray_DIM( input, 0 ) );
return 1;
}
else if ( PyGpuArray_DIM( input, 1 ) != num_channels )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformer: expected input to have %d channels, got %d channels.",
num_channels, PyGpuArray_DIM( input, 1 ) );
return 1;
}
if ( theano_prep_output( grid, 4, gpu_grid_dims, input->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformer: could not allocate memory for grid of coordinates" );
return 1;
"GpuDnnTransformerSampler: one of the sampler dimensions is zero" );
return 1;
}
if ( theano_prep_output( output, 4, out_dims, input->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 )
{
PyErr_SetString( PyExc_MemoryError,
"GpuDnnTransformer: could not allocate memory for grid sampler" );
"GpuDnnTransformerSampler: could not allocate memory for grid sampler" );
return 1;
}
......@@ -164,29 +103,15 @@ APPLY_SPECIFIC(dnn_sptf)(PyGpuArrayObject * input,
cuda_enter( gpu_ctx->ctx );
cuda_wait( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
cuda_wait( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfGridGeneratorForward( _handle, desc, PyGpuArray_DEV_DATA( theta ),
PyGpuArray_DEV_DATA( *grid ) );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformer: could not create grid of coordinates: %s",
cudnnGetErrorString( err ) );
cuda_exit( gpu_ctx->ctx );
return 1;
}
err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, APPLY_SPECIFIC(xdesc),
PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( *grid ), beta_p,
PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ), beta_p,
APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) );
cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
cuda_exit( gpu_ctx->ctx );
......@@ -194,10 +119,10 @@ APPLY_SPECIFIC(dnn_sptf)(PyGpuArrayObject * input,
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformer: could not create grid sampler: %s",
"GpuDnnTransformerSampler: could not create grid sampler: %s",
cudnnGetErrorString( err ) );
return 1;
}
return 0;
}
}
\ No newline at end of file
......@@ -2884,34 +2884,22 @@ class GpuDnnTransformerDesc(COp):
return (super(GpuDnnTransformerDesc, self).c_code_cache_version(), version())
class GpuDnnTransformer(DnnBase):
"""
Spatial transformer that can be used in spatial transformer networks, it
implements the grid generator and sampler. The localization network can
be built using neural net components of Theano.
"""
class GpuDnnTransformerGrid(DnnBase):
__props__ = ()
_cop_num_inputs = 4
_cop_num_outputs = 2
_cop_num_inputs = 3
_cop_num_outputs = 1
_f16_ok = True
default_output = 0
def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "APPLY_SPECIFIC(dnn_sptf)")
DnnBase.__init__(self, ["c_code/dnn_sptf_grid.c"], "APPLY_SPECIFIC(dnn_sptf_grid)")
def make_node(self, img, theta, desc):
def make_node(self, theta, desc):
context_name = infer_context_name(desc)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
img = gpu_contiguous(as_gpuarray_variable(img, context_name))
if img.type.ndim != 4:
raise TypeError('img must be a 4D tensor')
elif img.dtype not in ('float16', 'float32', 'float64'):
raise TypeError('img type must be floating-point')
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64')
assert theta.ndim == 3
......@@ -2922,33 +2910,72 @@ class GpuDnnTransformer(DnnBase):
assert grid_dims.ndim == 1
# Ensure 64-bit ints are passed to the C code
grid_dims = theano.tensor.basic.cast(grid_dims, 'int64')
grid = GpuArrayType(dtype=theta.dtype,
broadcastable=(theta.type.ndim + 1) * (False,),
context_name=context_name)()
output = GpuArrayType(dtype=img.dtype,
broadcastable=img.type.ndim * (False,),
context_name=context_name)()
inputs = [theta, grid_dims, desc]
outputs = [grid]
return Apply(self, inputs, outputs)
grid = GpuArrayType(dtype=img.dtype,
broadcastable=img.type.ndim * (False,),
context_name=context_name)()
def grad(self, inputs, grads):
theta, grid_dims, desc = inputs
dgrid = grads[0]
dtheta = GpuDnnTransformerGradT()(dgrid, desc)
return [dtheta, grad_not_implemented(self, 1, grid_dims), DisconnectedType()()]
inputs = [img, theta, grid_dims, desc]
outputs = [output, grid]
def connection_pattern(self, node):
# not connected to desc
return [[1], [1], [0]]
class GpuDnnTransformerSampler(DnnBase):
__props__ = ()
_cop_num_inputs = 3
_cop_num_outputs = 1
_f16_ok = True
def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_sampler.c"], "APPLY_SPECIFIC(dnn_sptf_sampler)")
def make_node(self, img, grid, desc):
context_name = infer_context_name(desc)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
img = gpu_contiguous(as_gpuarray_variable(img, context_name))
if img.type.ndim != 4:
raise TypeError('img must be a 4D tensor')
elif img.dtype not in ('float16', 'float32', 'float64'):
raise TypeError('img type must be floating-point')
grid = gpu_contiguous(as_gpuarray_variable(grid, context_name))
if grid.type.ndim != 4:
raise TypeError('grid must be a 4D tensor')
elif grid.dtype not in ('float16', 'float32', 'float64'):
raise TypeError('grid type must be floating-point')
out = GpuArrayType(dtype=img.dtype,
broadcastable=img.type.ndim * (False,),
context_name=context_name)()
inputs = [img, grid, desc]
outputs = [out]
return Apply(self, inputs, outputs)
def L_op(self, inputs, outputs, grads):
img, _, grid_dims, desc = inputs
_, grid = outputs
def grad(self, inputs, grads):
img, grid, desc = inputs
dy = grads[0]
dimg, dgrid = GpuDnnTransformerGradI()(img, grid, dy, desc)
dtheta = GpuDnnTransformerGradT()(dgrid, desc)
dgrid_dims = grad_not_implemented(self, grid_dims, 2)
return [dimg, dtheta, dgrid_dims, DisconnectedType()()]
return [dimg, dgrid, DisconnectedType()()]
def connection_pattern(self, node):
# not connected to desc
return [[1, 1], [1, 1], [1, 1], [0, 0]]
return [[1], [1], [0]]
class GpuDnnTransformerGradI(DnnBase):
......@@ -3096,8 +3123,9 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co
assert theta.ndim == 3
# Setup spatial transformer
transformer = GpuDnnTransformer()(img, theta, desc)
return transformer
grid = GpuDnnTransformerGrid()(theta, desc)
sampler = GpuDnnTransformerSampler()(img, grid, desc)
return sampler
@local_optimizer([AbstractConv2d, AbstractConv3d])
......
......@@ -2456,8 +2456,10 @@ def test_dnn_spatialtf():
st_dnn = dnn.dnn_spatialtf(t_img, t_theta, scale_height=scale_height,
scale_width=scale_width)
st_dnn_func = theano.function([t_img, t_theta], st_dnn)
# Check if function graph contains the spatial transformer Op
assert any([isinstance(node.op, dnn.GpuDnnTransformer)
# Check if function graph contains the spatial transformer's grid and sampler Ops
assert any([isinstance(node.op, dnn.GpuDnnTransformerGrid)
for node in st_dnn_func.maker.fgraph.toposort()])
assert any([isinstance(node.op, dnn.GpuDnnTransformerSampler)
for node in st_dnn_func.maker.fgraph.toposort()])
img_out_gpu = st_dnn_func(img, transform)
......@@ -2508,21 +2510,3 @@ def test_dnn_spatialtf_grad():
assert any([isinstance(node.op, dnn.GpuDnnTransformerGradT)
for node in grad_fn.maker.fgraph.toposort()])
# Verify grad wrt input
def functor_wrt_i(input):
desc = dnn.GpuDnnTransformerDesc(theano.config.floatX)(out_shp)
transformed_input = dnn.GpuDnnTransformer()(input, theta, desc)
grad = T.grad(T.mean(transformed_input), input)
return grad
# Verify grad wrt theta
def functor_wrt_t(theta):
desc = dnn.GpuDnnTransformerDesc(theano.config.floatX)(out_shp)
transformed_input = dnn.GpuDnnTransformer()(img, theta, out, desc)
grad = T.grad(T.mean(transformed_input), theta)
return grad
utt.verify_grad(functor_wrt_i, [img])
utt.verify_grad(functor_wrt_t, [theta])
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论