提交 388f057b authored 作者: João Victor Risso's avatar João Victor Risso

Remove spatial transformer descriptor from grid and sampler Ops

上级 ed89dc9e
#section support_code_apply
int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * out_dims,
cudnnSpatialTransformerDescriptor_t * desc,
PARAMS_TYPE * params)
{
cudnnStatus_t err;
const int nimages = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 0));
const int nchannels = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 1));
const int height = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 2));
const int width = (int) *((npy_int64 *) PyArray_GETPTR1(out_dims, 3));
if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 )
{
PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformerDesc: invalid grid dimensions" );
return 1;
}
// num_images, num_channels, height, width
const int out_tensor_dims[4] = { nimages, nchannels, height, width };
err = cudnnCreateSpatialTransformerDescriptor( desc );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDesc: could not allocate descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so it is not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor( *desc, CUDNN_SAMPLER_BILINEAR,
params->precision, 4, out_tensor_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDesc: could not initialize descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
return 0;
}
#section support_code_struct #section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
#section init_code_struct
cudnnStatus_t APPLY_SPECIFIC(err) = CUDNN_STATUS_SUCCESS;
APPLY_SPECIFIC(sptf) = NULL;
if ((APPLY_SPECIFIC(err) = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf))) != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerGrid: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
#section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL) { cudnnDestroySpatialTransformerDescriptor(APPLY_SPECIFIC(sptf)); }
#section support_code_struct
int int
APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta, APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
PyArrayObject * out_dims, PyArrayObject * out_dims,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** grid, PyGpuArrayObject ** grid,
cudnnHandle_t _handle) cudnnHandle_t _handle)
{ {
PyGpuContextObject * gpu_ctx = theta->context; PyGpuContextObject * gpu_ctx = theta->context;
size_t grid_dims[4]; size_t grid_dims[4];
int num_images, num_channels, height, width; int num_images, num_channels, height, width;
int desc_dims[4];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if ( theta->ga.typecode != GA_FLOAT && switch(theta->ga.typecode)
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
{ {
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString( PyExc_TypeError, PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer." ); "GpuDnnTransformerGrid: unsupported data type for theta in spatial transformer." );
return 1; return 1;
} }
else if ( PyGpuArray_DIM( theta, 1 ) != 2 || PyGpuArray_DIM( theta, 2 ) != 3 )
if ( PyGpuArray_DIM( theta, 1 ) != 2 || PyGpuArray_DIM( theta, 2 ) != 3 )
{ {
PyErr_Format( PyExc_RuntimeError, PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)", "GpuDnnTransformerGrid: incorrect dimensions for theta, expected (%d, %d, %d), got (%d, %d, %d)",
...@@ -38,14 +69,33 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta, ...@@ -38,14 +69,33 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
// Obtain output dimensions // Obtain output dimensions
num_images = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 0 ) ); num_images = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 0 ) );
num_channels = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 1 ) );
height = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 2 ) ); height = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 2 ) );
width = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 3 ) ); width = (int) *( (npy_int64 *) PyArray_GETPTR1( out_dims, 3 ) );
// Set grid dimensions
// Set transformed output dimensions to setup the descriptor
desc_dims[0] = num_images;
desc_dims[1] = num_channels;
desc_dims[2] = height;
desc_dims[3] = width;
// Set sampling grid dimensions
grid_dims[0] = num_images; grid_dims[0] = num_images;
grid_dims[1] = height; grid_dims[1] = height;
grid_dims[2] = width; grid_dims[2] = width;
grid_dims[3] = 2; grid_dims[3] = 2;
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, desc_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGrid: could not initialize descriptor (sptf): %s",
cudnnGetErrorString( err ) );
return 1;
}
if ( theano_prep_output( grid, 4, grid_dims, theta->ga.typecode, if ( theano_prep_output( grid, 4, grid_dims, theta->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 ) GA_C_ORDER, gpu_ctx ) != 0 )
{ {
...@@ -59,8 +109,8 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta, ...@@ -59,8 +109,8 @@ APPLY_SPECIFIC(dnn_sptf_grid)(PyGpuArrayObject * theta,
cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_wait( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfGridGeneratorForward( _handle, desc, PyGpuArray_DEV_DATA( theta ), err = cudnnSpatialTfGridGeneratorForward( _handle, APPLY_SPECIFIC(sptf),
PyGpuArray_DEV_DATA( *grid ) ); PyGpuArray_DEV_DATA( theta ), PyGpuArray_DEV_DATA( *grid ) );
cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_record( (*grid)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
......
#section support_code_struct #section support_code_struct
cudnnSpatialTransformerDescriptor_t APPLY_SPECIFIC(sptf);
cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc); cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc);
cudnnTensorDescriptor_t APPLY_SPECIFIC(ydesc); cudnnTensorDescriptor_t APPLY_SPECIFIC(ydesc);
#section init_code_struct #section init_code_struct
APPLY_SPECIFIC(sptf) = NULL;
APPLY_SPECIFIC(xdesc) = NULL; APPLY_SPECIFIC(xdesc) = NULL;
APPLY_SPECIFIC(ydesc) = NULL; APPLY_SPECIFIC(ydesc) = NULL;
{ {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
err = cudnnCreateSpatialTransformerDescriptor(&APPLY_SPECIFIC(sptf));
if (err != CUDNN_STATUS_SUCCESS)
{
PyErr_Format(PyExc_MemoryError,
"GpuDnnTransformerSampler: could not allocate spatial transformer descriptor (sptf): %s",
cudnnGetErrorString( err ));
FAIL;
}
err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) ); err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) );
if ( err != CUDNN_STATUS_SUCCESS ) if ( err != CUDNN_STATUS_SUCCESS )
{ {
...@@ -31,6 +43,9 @@ APPLY_SPECIFIC(ydesc) = NULL; ...@@ -31,6 +43,9 @@ APPLY_SPECIFIC(ydesc) = NULL;
#section cleanup_code_struct #section cleanup_code_struct
if (APPLY_SPECIFIC(sptf) != NULL)
cudnnDestroySpatialTransformerDescriptor(APPLY_SPECIFIC(sptf));
if ( APPLY_SPECIFIC(xdesc) != NULL ) if ( APPLY_SPECIFIC(xdesc) != NULL )
cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) ); cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) );
...@@ -42,7 +57,6 @@ if ( APPLY_SPECIFIC(ydesc) != NULL ) ...@@ -42,7 +57,6 @@ if ( APPLY_SPECIFIC(ydesc) != NULL )
int int
APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
PyGpuArrayObject * grid, PyGpuArrayObject * grid,
cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** output, PyGpuArrayObject ** output,
cudnnHandle_t _handle) cudnnHandle_t _handle)
{ {
...@@ -52,6 +66,8 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -52,6 +66,8 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
double alpha = 1.0, beta = 0.0; double alpha = 1.0, beta = 0.0;
float af = alpha, bf = beta; float af = alpha, bf = beta;
size_t out_dims[4]; size_t out_dims[4];
int desc_dims[4];
cudnnDataType_t dt;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
switch (input->ga.typecode) switch (input->ga.typecode)
...@@ -59,14 +75,17 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -59,14 +75,17 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
case GA_DOUBLE: case GA_DOUBLE:
alpha_p = (void *)α alpha_p = (void *)α
beta_p = (void *)β beta_p = (void *)β
dt = CUDNN_DATA_DOUBLE;
break; break;
case GA_FLOAT: case GA_FLOAT:
alpha_p = (void *)⁡ alpha_p = (void *)⁡
beta_p = (void *)&bf; beta_p = (void *)&bf;
dt = CUDNN_DATA_FLOAT;
break; break;
case GA_HALF: case GA_HALF:
alpha_p = (void *)⁡ alpha_p = (void *)⁡
beta_p = (void *)&bf; beta_p = (void *)&bf;
dt = CUDNN_DATA_HALF;
break; break;
default: default:
PyErr_SetString( PyExc_TypeError, PyErr_SetString( PyExc_TypeError,
...@@ -78,6 +97,11 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -78,6 +97,11 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
out_dims[1] = (size_t) PyGpuArray_DIM(input, 1); // num_channels out_dims[1] = (size_t) PyGpuArray_DIM(input, 1); // num_channels
out_dims[2] = (size_t) PyGpuArray_DIM(grid, 1); // grid height out_dims[2] = (size_t) PyGpuArray_DIM(grid, 1); // grid height
out_dims[3] = (size_t) PyGpuArray_DIM(grid, 2); // grid width out_dims[3] = (size_t) PyGpuArray_DIM(grid, 2); // grid width
// Set output dimensions for the descriptor setup
desc_dims[0] = (int) out_dims[0];
desc_dims[1] = (int) out_dims[1];
desc_dims[2] = (int) out_dims[2];
desc_dims[3] = (int) out_dims[3];
if ( out_dims[0] == 0 || out_dims[1] == 0 || out_dims[2] == 0 || out_dims[3] == 0 ) if ( out_dims[0] == 0 || out_dims[1] == 0 || out_dims[2] == 0 || out_dims[3] == 0 )
{ {
...@@ -94,6 +118,18 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -94,6 +118,18 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
return 1; return 1;
} }
// Currently, only the bilinear sampler is supported by cuDNN,
// so the sampler method is currently not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor(APPLY_SPECIFIC(sptf), CUDNN_SAMPLER_BILINEAR,
dt, 4, desc_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerSampler: could not initialize descriptor: %s",
cudnnGetErrorString( err ) );
return 1;
}
if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 ) if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 )
return 1; return 1;
...@@ -106,9 +142,9 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input, ...@@ -106,9 +142,9 @@ APPLY_SPECIFIC(dnn_sptf_sampler)(PyGpuArrayObject * input,
cuda_wait( grid->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_wait( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE ); cuda_wait( (*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfSamplerForward( _handle, desc, alpha_p, APPLY_SPECIFIC(xdesc), err = cudnnSpatialTfSamplerForward( _handle, APPLY_SPECIFIC(sptf), alpha_p,
PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ), beta_p, APPLY_SPECIFIC(xdesc), PyGpuArray_DEV_DATA( input ), PyGpuArray_DEV_DATA( grid ),
APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) ); beta_p, APPLY_SPECIFIC(ydesc), PyGpuArray_DEV_DATA( *output ) );
cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ ); cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
......
...@@ -13,6 +13,7 @@ from theano import Op, Apply, tensor, config, Variable ...@@ -13,6 +13,7 @@ from theano import Op, Apply, tensor, config, Variable
from theano.scalar import (as_scalar, constant, Log, get_scalar_type, from theano.scalar import (as_scalar, constant, Log, get_scalar_type,
int32 as int_t, bool as bool_t, uint32 as uint32_t) int32 as int_t, bool as bool_t, uint32 as uint32_t)
from theano.tensor import as_tensor_variable, Argmax from theano.tensor import as_tensor_variable, Argmax
from theano.tensor.extra_ops import cpu_contiguous
from theano.gradient import DisconnectedType, grad_not_implemented from theano.gradient import DisconnectedType, grad_not_implemented
from theano.gof import Optimizer, local_optimizer, COp, ParamsType, EnumList from theano.gof import Optimizer, local_optimizer, COp, ParamsType, EnumList
from theano.gof.cmodule import GCC_compiler from theano.gof.cmodule import GCC_compiler
...@@ -2738,66 +2739,12 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var, ...@@ -2738,66 +2739,12 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
return result return result
class GpuDnnTransformerDesc(COp):
"""
Descriptor Op for cuDNN Spatial Transformer.
"""
__props__ = ('precision',)
params_type = ParamsType(precision=cudnn.cudnnDataType_t)
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self):
header_dirs = [os.path.dirname(__file__)]
if config.dnn.include_path:
header_dirs += [config.dnn.include_path]
return header_dirs
def c_libraries(self):
return ['cudnn']
def c_lib_dirs(self):
lib_dirs = []
if config.dnn.library_path:
lib_dirs += [config.dnn.library_path]
return lib_dirs
def do_constant_folding(self, node):
return False
def __init__(self, precision=theano.config.floatX):
COp.__init__(self, ["c_code/dnn_sptf_desc.c"], "APPLY_SPECIFIC(dnn_sptf_desc)")
assert cudnn.cudnnDataType_t.has_alias(precision)
self.precision = precision
def make_node(self, out_dims):
out_dims = as_tensor_variable(out_dims)
assert out_dims.dtype in theano.tensor.basic.integer_dtypes
assert out_dims.ndim == 1
out_dims = theano.tensor.basic.cast(out_dims, 'int64')
node = Apply(self, [out_dims],
[CDataType("cudnnSpatialTransformerDescriptor_t",
freefunc="cudnnDestroySpatialTransformerDescriptor")()])
# DebugMode cannot compare the values of CDataType variables, so by
# default it returns False all the time. To prevent DebugMode from
# complaining because of the MergeOptimizer, we make this variable
# always compare to True.
out = node.outputs[0]
out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return node
def c_code_cache_version(self):
return (super(GpuDnnTransformerDesc, self).c_code_cache_version(), version())
class GpuDnnTransformerGrid(DnnBase): class GpuDnnTransformerGrid(DnnBase):
""" """
Grid generator Op for cuDNN Spatial Transformer. Grid generator Op for cuDNN Spatial Transformer.
""" """
__props__ = () __props__ = ()
_cop_num_inputs = 3 _cop_num_inputs = 2
_cop_num_outputs = 1 _cop_num_outputs = 1
_f16_ok = True _f16_ok = True
check_input = False check_input = False
...@@ -2805,7 +2752,7 @@ class GpuDnnTransformerGrid(DnnBase): ...@@ -2805,7 +2752,7 @@ class GpuDnnTransformerGrid(DnnBase):
def __init__(self): def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_grid.c"], "APPLY_SPECIFIC(dnn_sptf_grid)") DnnBase.__init__(self, ["c_code/dnn_sptf_grid.c"], "APPLY_SPECIFIC(dnn_sptf_grid)")
def make_node(self, theta, desc): def make_node(self, theta, out_dims):
""" """
Create a grid generator node for a cuDNN Spatial Transformer Create a grid generator node for a cuDNN Spatial Transformer
...@@ -2815,22 +2762,14 @@ class GpuDnnTransformerGrid(DnnBase): ...@@ -2815,22 +2762,14 @@ class GpuDnnTransformerGrid(DnnBase):
Affine transformation tensor containing one affine transformation Affine transformation tensor containing one affine transformation
matrix per image. ``theta`` is usually generated by the localization matrix per image. ``theta`` is usually generated by the localization
network. network.
desc : GpuDnnTransformerDesc
Spatial transformer descriptor
""" """
context_name = infer_context_name(desc) context_name = infer_context_name(theta)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name)) theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64') assert theta.dtype in ('float16', 'float32', 'float64')
assert theta.ndim == 3 assert theta.ndim == 3
# Setup output dimensions using input from descriptor out_dims = cpu_contiguous(as_tensor_variable(out_dims))
out_dims = as_tensor_variable(desc.owner.inputs[0])
assert out_dims.dtype in theano.tensor.basic.integer_dtypes assert out_dims.dtype in theano.tensor.basic.integer_dtypes
assert out_dims.ndim == 1 assert out_dims.ndim == 1
# Ensure 64-bit ints are passed to the C code # Ensure 64-bit ints are passed to the C code
...@@ -2839,20 +2778,16 @@ class GpuDnnTransformerGrid(DnnBase): ...@@ -2839,20 +2778,16 @@ class GpuDnnTransformerGrid(DnnBase):
broadcastable=(theta.type.ndim + 1) * (False,), broadcastable=(theta.type.ndim + 1) * (False,),
context_name=context_name)() context_name=context_name)()
inputs = [theta, out_dims, desc] inputs = [theta, out_dims]
outputs = [grid] outputs = [grid]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def grad(self, inputs, grads): def grad(self, inputs, grads):
theta, out_dims, desc = inputs theta, out_dims = inputs
dgrid = grads[0] dgrid = grads[0]
dtheta = GpuDnnTransformerGradT()(dgrid, desc) dtheta = GpuDnnTransformerGradT()(dgrid)
return [dtheta, grad_not_implemented(self, 1, out_dims), DisconnectedType()()] return [dtheta, grad_not_implemented(self, 1, out_dims)]
def connection_pattern(self, node):
# not connected to desc
return [[1], [1], [0]]
class GpuDnnTransformerSampler(DnnBase): class GpuDnnTransformerSampler(DnnBase):
...@@ -2860,7 +2795,7 @@ class GpuDnnTransformerSampler(DnnBase): ...@@ -2860,7 +2795,7 @@ class GpuDnnTransformerSampler(DnnBase):
Grid sampler Op for cuDNN Spatial Transformer. Grid sampler Op for cuDNN Spatial Transformer.
""" """
__props__ = () __props__ = ()
_cop_num_inputs = 3 _cop_num_inputs = 2
_cop_num_outputs = 1 _cop_num_outputs = 1
_f16_ok = True _f16_ok = True
check_input = False check_input = False
...@@ -2868,7 +2803,7 @@ class GpuDnnTransformerSampler(DnnBase): ...@@ -2868,7 +2803,7 @@ class GpuDnnTransformerSampler(DnnBase):
def __init__(self): def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_sampler.c"], "APPLY_SPECIFIC(dnn_sptf_sampler)") DnnBase.__init__(self, ["c_code/dnn_sptf_sampler.c"], "APPLY_SPECIFIC(dnn_sptf_sampler)")
def make_node(self, img, grid, desc): def make_node(self, img, grid):
""" """
Create a grid sampler node for a cuDNN Spatial Transformer Create a grid sampler node for a cuDNN Spatial Transformer
...@@ -2883,15 +2818,8 @@ class GpuDnnTransformerSampler(DnnBase): ...@@ -2883,15 +2818,8 @@ class GpuDnnTransformerSampler(DnnBase):
grid : GpuDnnTransformerGrid grid : GpuDnnTransformerGrid
Grid that contains the coordinates of the pixels to be sampled from Grid that contains the coordinates of the pixels to be sampled from
the inputs images. the inputs images.
desc : GpuDnnTransformerDesc
Spatial transformer descriptor
""" """
context_name = infer_context_name(desc) context_name = infer_context_name(img, grid)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
img = gpu_contiguous(as_gpuarray_variable(img, context_name)) img = gpu_contiguous(as_gpuarray_variable(img, context_name))
if img.type.ndim != 4: if img.type.ndim != 4:
...@@ -2909,20 +2837,16 @@ class GpuDnnTransformerSampler(DnnBase): ...@@ -2909,20 +2837,16 @@ class GpuDnnTransformerSampler(DnnBase):
broadcastable=img.type.ndim * (False,), broadcastable=img.type.ndim * (False,),
context_name=context_name)() context_name=context_name)()
inputs = [img, grid, desc] inputs = [img, grid]
outputs = [out] outputs = [out]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def grad(self, inputs, grads): def grad(self, inputs, grads):
img, grid, desc = inputs img, grid = inputs
dy = grads[0] dy = grads[0]
dimg, dgrid = GpuDnnTransformerGradI()(img, grid, dy, desc) dimg, dgrid = GpuDnnTransformerGradI()(img, grid, dy)
return [dimg, dgrid, DisconnectedType()()] return [dimg, dgrid]
def connection_pattern(self, node):
# not connected to desc
return [[1], [1], [0]]
class GpuDnnTransformerGradI(DnnBase): class GpuDnnTransformerGradI(DnnBase):
...@@ -2930,7 +2854,7 @@ class GpuDnnTransformerGradI(DnnBase): ...@@ -2930,7 +2854,7 @@ class GpuDnnTransformerGradI(DnnBase):
Gradient of inputs Op for cuDNN Spatial Transformer. Gradient of inputs Op for cuDNN Spatial Transformer.
""" """
__props__ = () __props__ = ()
_cop_num_inputs = 4 _cop_num_inputs = 3
_cop_num_outputs = 2 _cop_num_outputs = 2
_f16_ok = True _f16_ok = True
check_input = False check_input = False
...@@ -2938,12 +2862,8 @@ class GpuDnnTransformerGradI(DnnBase): ...@@ -2938,12 +2862,8 @@ class GpuDnnTransformerGradI(DnnBase):
def __init__(self): def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)") DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)")
def make_node(self, img, grid, dy, desc): def make_node(self, img, grid, dy):
context_name = infer_context_name(img, grid, dy, desc) context_name = infer_context_name(img, grid, dy)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
img = as_gpuarray_variable(gpu_contiguous(img), context_name) img = as_gpuarray_variable(gpu_contiguous(img), context_name)
if img.ndim != 4: if img.ndim != 4:
...@@ -2960,22 +2880,18 @@ class GpuDnnTransformerGradI(DnnBase): ...@@ -2960,22 +2880,18 @@ class GpuDnnTransformerGradI(DnnBase):
dimg = img.type() dimg = img.type()
dgrid = grid.type() dgrid = grid.type()
inputs = [img, grid, dy, desc] inputs = [img, grid, dy]
outputs = [dimg, dgrid] outputs = [dimg, dgrid]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def connection_pattern(self, node):
# not connected to desc
return [[1, 1], [1, 1], [1, 1], [0, 0]]
class GpuDnnTransformerGradT(DnnBase): class GpuDnnTransformerGradT(DnnBase):
""" """
Gradient of affine transformations Op for cuDNN Spatial Transformer. Gradient of affine transformations Op for cuDNN Spatial Transformer.
""" """
__props__ = () __props__ = ()
_cop_num_inputs = 2 _cop_num_inputs = 1
_cop_num_outputs = 1 _cop_num_outputs = 1
_f16_ok = True _f16_ok = True
check_input = False check_input = False
...@@ -2983,12 +2899,8 @@ class GpuDnnTransformerGradT(DnnBase): ...@@ -2983,12 +2899,8 @@ class GpuDnnTransformerGradT(DnnBase):
def __init__(self): def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)") DnnBase.__init__(self, ["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)")
def make_node(self, dgrid, desc): def make_node(self, dgrid):
context_name = infer_context_name(desc) context_name = infer_context_name(dgrid)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
dgrid = as_gpuarray_variable(dgrid, context_name) dgrid = as_gpuarray_variable(dgrid, context_name)
assert dgrid.dtype in ('float16', 'float32', 'float64') assert dgrid.dtype in ('float16', 'float32', 'float64')
...@@ -2997,15 +2909,11 @@ class GpuDnnTransformerGradT(DnnBase): ...@@ -2997,15 +2909,11 @@ class GpuDnnTransformerGradT(DnnBase):
dtheta = GpuArrayType(dtype=dgrid.dtype, dtheta = GpuArrayType(dtype=dgrid.dtype,
broadcastable=(dgrid.type.ndim - 1) * (False,), broadcastable=(dgrid.type.ndim - 1) * (False,),
context_name=context_name)() context_name=context_name)()
inputs = [dgrid, desc] inputs = [dgrid]
outputs = [dtheta] outputs = [dtheta]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def connection_pattern(self, node):
# not connected to desc
return [[1], [0]]
def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.config.floatX): def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.config.floatX):
""" """
...@@ -3050,10 +2958,7 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co ...@@ -3050,10 +2958,7 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co
theano.tensor.ceil(img.shape[3] * scale_width)) theano.tensor.ceil(img.shape[3] * scale_width))
out_dims = tuple([as_scalar(v).astype('int64') for v in out_dims]) out_dims = tuple([as_scalar(v).astype('int64') for v in out_dims])
# Create spatial transformer descriptor context_name = infer_context_name(img, theta)
desc = GpuDnnTransformerDesc(precision)(out_dims)
context_name = infer_context_name(desc)
img = gpu_contiguous(as_gpuarray_variable(img, context_name)) img = gpu_contiguous(as_gpuarray_variable(img, context_name))
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name)) theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
...@@ -3063,8 +2968,8 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co ...@@ -3063,8 +2968,8 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.co
assert theta.ndim == 3 assert theta.ndim == 3
# Setup spatial transformer # Setup spatial transformer
grid = GpuDnnTransformerGrid()(theta, desc) grid = GpuDnnTransformerGrid()(theta, out_dims)
sampler = GpuDnnTransformerSampler()(img, grid, desc) sampler = GpuDnnTransformerSampler()(img, grid)
return sampler return sampler
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论