Add basic skeleton for Spatial Transformer Networks' Ops

上级 743f7aa9
#section support_code_apply
int APPLY_SPECIFIC(spatialtf_desc)(cudnnConvolutionDescriptor_t * desc,
PARAMS_TYPE * params)
{
cudnnStatus_t err;
// width, height, num_feature_maps, num_images
const int out_tensor_dims[4] = { params->dim0, params->dim1, params->dim2, params->dim3 };
err = cudnnCreateSpatialTransformerDescriptor( desc );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"Failed to allocate spatial transformer descriptor: %s",
cudnnGetErrorString( err ) );
return -1;
}
// Currently, only the bilinear sampler is supported by cuDNN,
// so it is not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor( *desc, CUDNN_SAMPLER_BILINEAR,
params->precision, params->nb_dims, out_tensor_dims );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_MemoryError,
"Failed to initialize spatial transformer descriptor: %s",
cudnnGetErrorString( err ) );
return -1;
}
return 0;
}
#section support_code
int spatialtf_grid(cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject * theta,
PyGpuArrayObject * num_dimensions,
PyGpuArrayObject ** grid,
cudnnHandle_t _handle)
{
cudnnDataType_t dt;
cudnnStatus_t err;
// Obtain GPU datatype from theta
switch( theta->ga.typecode )
{
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString( PyExc_TypeError, "Unsupported data type for theta" );
return -1;
}
switch( num_dimensions->ga.typecode )
{
case GA_INT:
break;
default:
PyErr_SetString( PyExc_TypeError, "Unsupported data type for the number of dimensions" );
}
if ( NULL == desc )
{
err = cudnnCreateSpatialTransformerDescriptor( &desc );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_SetString( PyExc_MemoryError,
"Could not allocate spatial transformer descriptor" );
return -1;
}
err = cudnnSetSpatialTransformerNdDescriptor( desc, CUDNN_SAMPLER_BILINEAR, dt, , );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_RuntimeError,
"Could not set spatial transformer descriptor: %s",
cudnnGetErrorString(err)) ;
return -1;
}
}
return 0;
}
...@@ -2833,6 +2833,147 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2833,6 +2833,147 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
return [rval] return [rval]
class GpuDnnSpatialTfDesc(COp):
"""
This Op builds a spatial transformer descriptor for use in spatial transformer network
operations.
"""
__props__ = ('dimensions', 'precision')
params_type = ParamsType(dim0=int_t, dim1=int_t, dim2=int_t, dim3=int_t,
precision=cudnn.cudnnDataType_t)
def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h']
def c_header_dirs(self):
return [os.path.dirname(__file__), config.dnn.include_path]
def c_libraries(self):
return ['cudnn']
def c_lib_dirs(self):
return [config.dnn.library_path]
def do_constant_folding(self, node):
return False
def __init__(self, dimensions, precision="float32"):
COp.__init__(self, ["c_code/spatialtf_desc.c"], "APPLY_SPECIFIC(spatialtf_desc)")
self.dimensions = dimensions if isinstance(dimensions, tuple) else tuple(dimensions)
# cuDNN supports only 2D transformations, therefore output tensor must
# not exceed 4 dimensions (num_images, num_feature_maps, height, width)
assert len(self.dimensions) <= 4
assert cudnn.cudnnDataType_t.has_alias(precision)
self.precision = precision
def make_node(self):
node = Apply(self, [],
[CDataType("cudnnSpatialTransformerDescriptor_t",
freefunc="cudnnDestroySpatialTransformerDescriptor")()])
# DebugMode cannot compare the values of CDataType variables, so by
# default it returns False all the time. To prevent DebugMode from
# complaining because of the MergeOptimizer, we make this variable
# always compare to True.
out = node.outputs[0]
out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
return node
# Grid width
dim0 = property(lambda self: self.dimensions[0])
# Grid height
dim1 = property(lambda self: self.dimensions[1])
# Number of feature maps
dim2 = property(lambda self: self.dimensions[2] if len(self.subsample) > 2 else 1)
# Number of images
dim3 = property(lambda self: self.dimensions[3] if len(self.dimensions) > 3 else 1)
# Number of dimensions in the output tensor
nb_dims = property(lambda self: len(self.dimensions))
def c_code_cache_version(self):
return (super(GpuDnnSpatialTfDesc, self).c_code_cache_version(), version())
class GpuDnnGridGeneratorOp(DnnBase):
"""
This Op builds a spatial transformer grid generator for use in spatial transformer network
operations.
"""
__props__ = ()
_cop_num_inputs = 3
_cop_num_outputs = 1
def __init__(self):
DnnBase.__init__(self, ["c_code/spatialtf_grid.c"], "spatialtf_grid")
def dnn_context(self, node):
return node.outputs[1].type.context_name
def make_node(self, desc, theta, cx=None):
if cx is None:
context_name = infer_context_name(theta)
else:
context_name = infer_context_name(theta, cx)
# TODO: create output grid
grid = GpuArrayType()
inputs = [desc, theta]
outputs = []
return Apply(self, inputs, outputs)
def L_op(self, inputs, outputs, output_grads):
pass
class GpuDnnGridSamplerOp(DnnBase):
"""
This Op builds a spatial transformer grid sampler for use in spatial transformer network
operations.
"""
__props__ = ()
_cop_num_inputs = 3
_cop_num_outputs = 1
def __init__(self):
DnnBase.__init__(self, ["c_code/spatialtf_sampler.c"], "spatialtf_sampler")
def dnn_context(self, node):
return node.outputs[1].type.context_name
def make_node(self, desc, grid, inputs):
# desc: transformer net descriptor
# grid: grid generator created by GpuDnnGridGeneratorOp
# inputs: input tensor
# TODO:
# - create output tensor (y in the cuDNN documentations)
pass
def L_op(self, inputs, outputs, output_grads):
pass
def dnn_spatialtf_context(dimensions, precision="float32"):
return GpuDnnSpatialTfDesc(dimensions, precision)()
def dnn_spatialtf_grid():
pass
def dnn_spatialtf_sampler():
pass
@local_optimizer([AbstractConv2d, AbstractConv3d]) @local_optimizer([AbstractConv2d, AbstractConv3d])
def local_abstractconv_cudnn(node): def local_abstractconv_cudnn(node):
ctx = infer_context_name(*node.inputs) ctx = infer_context_name(*node.inputs)
...@@ -2880,6 +3021,7 @@ def local_dnn_convgw_inplace(node, inputs): ...@@ -2880,6 +3021,7 @@ def local_dnn_convgw_inplace(node, inputs):
def local_dnn_convgi_inplace(node, inputs): def local_dnn_convgi_inplace(node, inputs):
return [GpuDnnConvGradI(algo=node.op.algo, inplace=True, num_groups=node.op.num_groups)(*inputs)] return [GpuDnnConvGradI(algo=node.op.algo, inplace=True, num_groups=node.op.num_groups)(*inputs)]
optdb.register('local_dnna_conv_inplace', optdb.register('local_dnna_conv_inplace',
tensor.opt.in2out(local_dnn_conv_inplace, tensor.opt.in2out(local_dnn_conv_inplace,
local_dnn_convgw_inplace, local_dnn_convgw_inplace,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论