Merge spatial transformer implementation into a single Op, GpuDnnTransformer

上级 09e362f2
#section support_code
int
spatialtf_grid(PyArrayObject * grid_dimensions,
PyGpuArrayObject * theta,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** grid,
cudnnHandle_t _handle)
{
PyGpuContextObject * gpu_ctx = theta->context;
cudnnStatus_t err;
if ( theta->ga.typecode != GA_FLOAT &&
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
{
PyErr_SetString( PyExc_TypeError, "Unsupported data type for theta" );
return -1;
}
if ( PyGpuArray_NDIM( theta ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"theta must have three dimensions!" );
return -1;
}
if ( PyGpuArray_DIM( theta, 1 ) != 2 && PyGpuArray_DIM( theta, 2 ) != 3 )
{
PyErr_Format( PyExc_RuntimeError,
"Incorrect dimensions for theta, should be (%d, %d, %d), got (%d, %d, %d)",
PyGpuArray_DIMS( theta )[0], 2, 3, PyGpuArray_DIMS( theta )[0],
PyGpuArray_DIMS( theta )[1], PyGpuArray_DIMS( theta )[2] );
return -1;
}
if ( PyArray_DIM( grid_dimensions, 0 ) != 4 )
{
PyErr_Format( PyExc_RuntimeError,
"grid_dimensions must have 4 dimensions!" );
return -1;
}
// Obtain grid dimensions
const size_t num_images = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 0 ) );
// Dimension 1 is the number of image channels
const size_t height = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 2 ) );
const size_t width = (size_t) *( (npy_int *) PyArray_GETPTR1( grid_dimensions, 3 ) );
// Grid of coordinates is of size num_images * height * width * 2 for a 2D transformation
const size_t grid_dims[4] = { num_images, height, width, 2 };
if ( width == 0 || height == 0 || num_images == 0 )
{
PyErr_Format( PyExc_RuntimeError,
"One of the grid dimensions is zero" );
return -1;
}
if ( NULL == *grid ||
! theano_size_check( *grid, 4, grid_dims, (*grid)->ga.typecode ) )
{
Py_XDECREF( *grid );
*grid = pygpu_empty( 4, grid_dims, theta->ga.typecode, GA_C_ORDER,
gpu_ctx, Py_None );
if ( NULL == *grid )
{
PyErr_SetString( PyExc_MemoryError,
"Could not allocate memory for grid of coordinates" );
return -1;
}
}
if ( ! GpuArray_IS_C_CONTIGUOUS( &(theta->ga) ) )
{
PyErr_SetString( PyExc_MemoryError,
"theta data is not C-contiguous" );
return -1;
}
if ( ! GpuArray_IS_C_CONTIGUOUS( &((*grid)->ga) ) )
{
PyErr_SetString( PyExc_MemoryError,
"grid data is not C-contiguous" );
return -1;
}
cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
const void * theta_data = PyGpuArray_DEV_DATA( theta );
void * grid_data = PyGpuArray_DEV_DATA( *grid );
err = cudnnSpatialTfGridGeneratorForward( _handle, desc, theta_data, grid_data );
cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_RuntimeError,
"Failed to create grid of coordinates: %s",
cudnnGetErrorString( err ) );
return -1;
}
return 0;
}
...@@ -2833,7 +2833,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2833,7 +2833,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
return [rval] return [rval]
class GpuDnnSpatialTfDesc(COp): class _GpuDnnTransformerDescriptor(COp):
""" """
This Op builds a spatial transformer descriptor for use in spatial transformer network This Op builds a spatial transformer descriptor for use in spatial transformer network
...@@ -2859,13 +2859,13 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2859,13 +2859,13 @@ class GpuDnnSpatialTfDesc(COp):
return False return False
def __init__(self, dtype=theano.config.floatX): def __init__(self, dtype=theano.config.floatX):
COp.__init__(self, ["c_code/spatialtf_desc.c"], "APPLY_SPECIFIC(spatialtf_desc)") COp.__init__(self, ["c_code/dnn_sptf_desc.c"], "APPLY_SPECIFIC(dnn_sptf_desc)")
assert cudnn.cudnnDataType_t.has_alias(dtype) assert cudnn.cudnnDataType_t.has_alias(dtype)
self.dtype = dtype self.dtype = dtype
def make_node(self, dimensions): def make_node(self, dimensions):
# cuDNN supports only 2D transformations, therefore output tensor must # cuDNN supports only 2D transformations, and the output tensor must
# have exactly 4 dimensions: (num_images, num_channels, height, width) # have exactly 4 dimensions: (num_images, num_channels, height, width)
assert len(dimensions) == 4 assert len(dimensions) == 4
dimensions = tuple(dimensions) dimensions = tuple(dimensions)
...@@ -2883,63 +2883,31 @@ class GpuDnnSpatialTfDesc(COp): ...@@ -2883,63 +2883,31 @@ class GpuDnnSpatialTfDesc(COp):
return node return node
def c_code_cache_version(self): def c_code_cache_version(self):
return (super(GpuDnnSpatialTfDesc, self).c_code_cache_version(), version()) return (super(_GpuDnnTransformerDescriptor, self).c_code_cache_version(), version())
class GpuDnnGridGenerator(DnnBase): class GpuDnnTransformer(DnnBase):
""" """
This Op builds a spatial transformer grid generator for use in spatial transformer network This Op builds a spatial transformer that can be used in spatial transformer networks.
operations.
""" """
__props__ = ('dtype',) __props__ = ('dtype',)
_cop_num_inputs = 3 _cop_num_inputs = 6
_cop_num_outputs = 1 _cop_num_outputs = 1
_f16_ok = True
def __init__(self, dtype): def __init__(self, dtype):
DnnBase.__init__(self, ["c_code/spatialtf_grid.c"], "spatialtf_grid") DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "dnn_sptf")
self.dtype = dtype self.dtype = dtype
def make_node(self, grid_dimensions, theta, desc): def make_node(self, img, theta, grid_dims, desc, alpha=None, beta=None):
context_name = infer_context_name(desc, theta)
grid_dimensions = as_tensor_variable(grid_dimensions)
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64') assert theta.dtype in ('float16', 'float32', 'float64')
# Allocate GPU memory for grid of coordinates context_name = infer_context_name(img)
grid = GpuArrayType(dtype=self.dtype,
broadcastable=(False, False, False, False,),
context_name=context_name)()
return Apply(self, [grid_dimensions, theta, desc], [grid])
def L_op(self, inputs, outputs, output_grads):
pass
class GpuDnnGridSampler(DnnBase):
"""
This Op builds a spatial transformer grid sampler for use in spatial transformer network
operations.
"""
__props__ = ('dtype',)
_cop_num_inputs = 5
_cop_num_outputs = 1
def __init__(self, dtype):
DnnBase.__init__(self, ["c_code/spatialtf_sampler.c"], "spatialtf_sampler")
self.dtype = dtype
def make_node(self, img, grid, desc, alpha=None, beta=None):
context_name = infer_context_name(img, grid)
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
img = as_gpuarray_variable(img, context_name) img = as_gpuarray_variable(img, context_name)
grid = as_gpuarray_variable(grid, context_name) grid_dims = as_tensor_variable(grid_dims)
output = GpuArrayType(dtype=self.dtype, output = GpuArrayType(dtype=self.dtype,
broadcastable=img.type.ndim * (False,), broadcastable=img.type.ndim * (False,),
...@@ -2955,9 +2923,9 @@ class GpuDnnGridSampler(DnnBase): ...@@ -2955,9 +2923,9 @@ class GpuDnnGridSampler(DnnBase):
alpha = ensure_dt(alpha, _one, 'alpha', img.dtype) alpha = ensure_dt(alpha, _one, 'alpha', img.dtype)
beta = ensure_dt(beta, _zero, 'beta', img.dtype) beta = ensure_dt(beta, _zero, 'beta', img.dtype)
return Apply(self, [img, grid, desc, alpha, beta], [output]) return Apply(self, [img, theta, grid_dims, desc, alpha, beta], [output])
def L_op(self, inputs, outputs, output_grads): def L_op(self, inputs, outputs, grads):
pass pass
...@@ -3011,13 +2979,12 @@ def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=No ...@@ -3011,13 +2979,12 @@ def dnn_spatialtf(inp, theta, scale_width=1, scale_height=1, alpha=None, beta=No
theta = gpu_contiguous(theta) theta = gpu_contiguous(theta)
# Create spatial transformer descriptor # Create spatial transformer descriptor
desc = GpuDnnSpatialTfDesc(dtype)(grid_dims) desc = _GpuDnnTransformerDescriptor(dtype)(grid_dims)
# Create grid dimensions variable # Create grid dimensions variable
grid_dims_var = as_tensor_variable(grid_dims) grid_dims_var = as_tensor_variable(grid_dims)
# Setup and return sampling grid # Setup spatial transformer
grid_coord = GpuDnnGridGenerator(dtype)(grid_dims_var, theta, desc) transformer = GpuDnnTransformer(dtype)(inp, theta, grid_dims_var, desc, alpha, beta)
grid_sampler = GpuDnnGridSampler(dtype)(inp, grid_coord, desc, alpha, beta) return transformer
return grid_sampler
@local_optimizer([AbstractConv2d, AbstractConv3d]) @local_optimizer([AbstractConv2d, AbstractConv3d])
......
...@@ -2440,7 +2440,6 @@ def test_dnn_spatialtf(): ...@@ -2440,7 +2440,6 @@ def test_dnn_spatialtf():
img = np.random.randint(low=0, high=256, size=img_dims) img = np.random.randint(low=0, high=256, size=img_dims)
# Convert from NHWC to NCHW # Convert from NHWC to NCHW
img = np.transpose(img, axes=(0, 3, 1, 2)).astype(theano.config.floatX) img = np.transpose(img, axes=(0, 3, 1, 2)).astype(theano.config.floatX)
gpu_img = gpuarray_shared_constructor(img)
# Downsample image dimensions by a factor of 2, i.e. our output tensor will # Downsample image dimensions by a factor of 2, i.e. our output tensor will
# have shape (n, c, h / 2, w / 2) # have shape (n, c, h / 2, w / 2)
scale_height = 0.25 scale_height = 0.25
...@@ -2451,25 +2450,25 @@ def test_dnn_spatialtf(): ...@@ -2451,25 +2450,25 @@ def test_dnn_spatialtf():
[0, -1, 0]] [0, -1, 0]]
transform = np.asarray(img_dims[0] * [theta], dtype=theano.config.floatX) transform = np.asarray(img_dims[0] * [theta], dtype=theano.config.floatX)
gpu_transform = gpuarray_shared_constructor(transform)
st_dnn = dnn.dnn_spatialtf(gpu_img, gpu_transform, scale_height=scale_height, # Create symbolic variables for inputs and transformations
t_img = T.tensor4('img')
t_theta = T.tensor3('theta')
st_dnn = dnn.dnn_spatialtf(t_img, t_theta, scale_height=scale_height,
scale_width=scale_width) scale_width=scale_width)
st_dnn_func = theano.function([], [st_dnn]) st_dnn_func = theano.function([t_img, t_theta], [st_dnn])
img_out_gpu, = st_dnn_func(img, transform)
img_out = np.asarray(img_out_gpu)
# Check if function graph contains the spatial transformer Ops # Check if function graph contains the spatial transformer Ops
topo = st_dnn_func.maker.fgraph.toposort() topo = st_dnn_func.maker.fgraph.toposort()
assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnGridGenerator)]) == 1 assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnTransformer)]) == 1
assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnGridSampler)]) == 1
# Setup CPU Op # Setup CPU Op
t_img = T.tensor4('img')
t_theta = T.tensor3('theta')
st_cpu = spatialtf_cpu(t_theta, t_img, scale_height, scale_width, 'nearest') st_cpu = spatialtf_cpu(t_theta, t_img, scale_height, scale_width, 'nearest')
st_cpu_func = theano.function([t_theta, t_img], [st_cpu], mode=mode_without_gpu) st_cpu_func = theano.function([t_theta, t_img], [st_cpu], mode=mode_without_gpu)
res, = st_cpu_func(transform, img) res, = st_cpu_func(transform, img)
img_out_gpu = st_dnn_func()
img_out = np.asarray(img_out_gpu[0])
utt.assert_allclose(img_out, res, rtol=1e-2, atol=1e-2) utt.assert_allclose(img_out, res, rtol=1e-2, atol=1e-2)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论