Add initial implementation of Spatial Transformer gradients

上级 2f90f3b0
#section support_code_struct #section support_code_struct
cudnnTensorDescriptor_t APPLY_SPECIFIC(xdesc);
cudnnTensorDescriptor_t APPLY_SPECIFIC(dxdesc);
cudnnTensorDescriptor_t APPLY_SPECIFIC(dydesc);
#section init_code_struct
APPLY_SPECIFIC(xdesc) = NULL;
APPLY_SPECIFIC(dxdesc) = NULL;
APPLY_SPECIFIC(dydesc) = NULL;
{
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(xdesc) );
if ( err != CUDNN_STATUS_SUCCESS )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor xdesc: %s",
cudnnGetErrorString( err ) );
FAIL;
}
err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(dxdesc) );
if ( err != CUDNN_STATUS_SUCCESS )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor dxdesc: %s",
cudnnGetErrorString( err ) );
FAIL;
}
err = cudnnCreateTensorDescriptor( &APPLY_SPECIFIC(dydesc) );
if ( err != CUDNN_STATUS_SUCCESS )
{
PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerGradI: failed to allocate cuDNN tensor descriptor dydesc: %s",
cudnnGetErrorString( err ) );
FAIL;
}
}
#section cleanup_code_struct
if ( APPLY_SPECIFIC(xdesc) != NULL )
cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(xdesc) );
if ( APPLY_SPECIFIC(dxdesc) != NULL )
cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(dxdesc) );
if ( APPLY_SPECIFIC(dydesc) != NULL )
cudnnDestroyTensorDescriptor( APPLY_SPECIFIC(dydesc) );
#section support_code_struct
int int
dnn_sptf_gi(PyGpuArrayObject * input, APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input,
PyGpuArrayObject * theta, PyGpuArrayObject * theta,
PyGpuArrayObject * grid, PyGpuArrayObject * grid,
PyArrayObject * grid_dims, PyArrayObject * grid_dims,
...@@ -12,6 +65,109 @@ dnn_sptf_gi(PyGpuArrayObject * input, ...@@ -12,6 +65,109 @@ dnn_sptf_gi(PyGpuArrayObject * input,
PyGpuArrayObject ** grid_grad, PyGpuArrayObject ** grid_grad,
cudnnHandle_t _handle) cudnnHandle_t _handle)
{ {
PyErr_SetString(PyExc_NotImplementedError, "Gradient for spatial transformer is not yet implemented."); PyGpuContextObject * gpu_ctx = input->context;
void * alpha_p;
void * beta_p;
float af = alpha, bf = beta;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
int input_num_images, input_num_channels,
input_height, input_width;
int num_images, num_channels, height, width;
switch (input->ga.typecode)
{
case GA_DOUBLE:
alpha_p = (void *)α
beta_p = (void *)β
break;
case GA_FLOAT:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
break;
case GA_HALF:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
break;
default:
PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformerGradI: unsupported type for input in spatial transformer gradients" );
return -1;
}
if ( theta->ga.typecode != GA_FLOAT &&
theta->ga.typecode != GA_DOUBLE &&
theta->ga.typecode != GA_HALF )
{
PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformerGradI: unsupported data type for theta in spatial transformer gradients." );
return -1; return -1;
}
if ( grid->ga.typecode != GA_FLOAT &&
grid->ga.typecode != GA_DOUBLE &&
grid->ga.typecode != GA_HALF )
{
PyErr_SetString( PyExc_TypeError,
"GpuDnnTransformerGradI: unsupported data type for grid in spatial transformer gradients." );
return -1;
}
if ( theano_prep_output( input_grad, PyGpuArray_NDIM( input ),
PyGpuArray_DIMS( input ), input->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 )
return 1;
if ( theano_prep_output( grid_grad, PyGpuArray_NDIM( grid ),
PyGpuArray_DIMS( grid ), grid->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 )
return 1;
if ( c_set_tensorNd( input, APPLY_SPECIFIC(xdesc) ) != 0 )
return 1;
if ( c_set_tensorNd( dy, APPLY_SPECIFIC(dydesc) ) != 0 )
return 1;
if ( c_set_tensorNd( *input_grad, APPLY_SPECIFIC(dxdesc) ) != 0 )
return 1;
// Directly return the outputs if any of the dimensions is 0.
// (cuDNN does not support zero-length dimensions.)
if ( PyGpuArray_SIZE( *input_grad ) == 0 || PyGpuArray_SIZE( *grid_grad ) == 0 )
return 0;
cuda_enter( gpu_ctx->ctx );
cuda_wait( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( dy->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*input_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
cuda_wait( (*grid_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfSamplerBackward( _handle, desc, alpha_p, APPLY_SPECIFIC(xdesc),
PyGpuArray_DEV_DATA( input ), beta_p, APPLY_SPECIFIC(dxdesc),
PyGpuArray_DEV_DATA( *input_grad ), alpha_p, APPLY_SPECIFIC(dydesc),
PyGpuArray_DEV_DATA( dy ), PyGpuArray_DEV_DATA( grid ), beta_p,
PyGpuArray_DEV_DATA( *grid_grad ) );
cuda_record( input->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( theta->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( grid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( dy->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*input_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
cuda_record( (*grid_grad)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
cuda_exit( gpu_ctx->ctx );
if ( CUDNN_STATUS_SUCCESS != err )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformerGradI: failed to compute gradients of the inputs: %s",
cudnnGetErrorString( err ) );
return -1;
}
return 0;
} }
#section support_code_struct #section support_code_struct
int int
dnn_sptf_gt(PyGpuArrayObject * dgrid, APPLY_SPECIFIC(dnn_sptf_gt)(PyGpuArrayObject * dgrid,
cudnnSpatialTransformerDescriptor_t desc, cudnnSpatialTransformerDescriptor_t desc,
PyGpuArrayObject ** dtheta, PyGpuArrayObject ** dtheta,
cudnnHandle_t _handle) cudnnHandle_t _handle)
{ {
PyErr_SetString(PyExc_NotImplementedError, "Gradient for spatial transformer is not yet implemented."); PyGpuContextObject * gpu_ctx = dgrid->context;
return -1; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
int num_images = (int) PyGpuArray_DIM( dgrid, 0 );
const size_t dtheta_dims[3] = { num_images, 2, 3 };
if ( theano_prep_output( dtheta, 3, &(dtheta_dims[0]), dgrid->ga.typecode,
GA_C_ORDER, gpu_ctx ) != 0 )
return 1;
cuda_enter( gpu_ctx->ctx );
cuda_wait( dgrid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_wait( (*dtheta)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
err = cudnnSpatialTfGridGeneratorBackward( _handle, desc,
PyGpuArray_DEV_DATA( dgrid ), PyGpuArray_DEV_DATA( *dtheta ) );
cuda_record( dgrid->ga.data, GPUARRAY_CUDA_WAIT_READ );
cuda_record( (*dtheta)->ga.data, GPUARRAY_CUDA_WAIT_WRITE );
cuda_exit( gpu_ctx->ctx );
if ( err != CUDNN_STATUS_SUCCESS )
{
PyErr_Format( PyExc_RuntimeError,
"GpuDnnTransformerGradT: could not compute gradients of the affine transformation: %s",
cudnnGetErrorString( err ) );
return 1;
}
return 0;
} }
...@@ -2958,13 +2958,23 @@ class GpuDnnTransformerGradI(DnnBase): ...@@ -2958,13 +2958,23 @@ class GpuDnnTransformerGradI(DnnBase):
_f16_ok = True _f16_ok = True
def __init__(self, dtype=theano.config.floatX): def __init__(self, dtype=theano.config.floatX):
DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "dnn_sptf_gi") DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)")
self.dtype = dtype self.dtype = dtype
def make_node(self, img, theta, grid, grid_dims, dy, desc, alpha, beta): def make_node(self, img, theta, grid, grid_dims, dy, desc, alpha, beta):
context_name = infer_context_name(img) context_name = infer_context_name(img)
if img.ndim != 4:
raise RuntimeError('img must have 4 dimensions.')
if theta.ndim != 3:
raise RuntimeError('theta must have 3 dimensions')
img = as_gpuarray_variable(gpu_contiguous(img), context_name)
theta = as_gpuarray_variable(gpu_contiguous(theta), context_name)
grid = as_gpuarray_variable(gpu_contiguous(grid), context_name)
grid_dims = as_tensor_variable(grid_dims)
dy = as_gpuarray_variable(dy, context_name) dy = as_gpuarray_variable(dy, context_name)
dimg = GpuArrayType(dtype=self.dtype, dimg = GpuArrayType(dtype=self.dtype,
broadcastable=img.type.ndim * (False,), broadcastable=img.type.ndim * (False,),
context_name=context_name)() context_name=context_name)()
...@@ -2988,7 +2998,7 @@ class GpuDnnTransformerGradT(DnnBase): ...@@ -2988,7 +2998,7 @@ class GpuDnnTransformerGradT(DnnBase):
_f16_ok = True _f16_ok = True
def __init__(self, dtype=theano.config.floatX): def __init__(self, dtype=theano.config.floatX):
DnnBase.__init__(self, ["c_code/dnn_sptf_gt.c"], "dnn_sptf_gt") DnnBase.__init__(self, ["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)")
self.dtype = dtype self.dtype = dtype
def make_node(self, dgrid, desc): def make_node(self, dgrid, desc):
......
...@@ -2467,7 +2467,9 @@ def test_dnn_spatialtf(): ...@@ -2467,7 +2467,9 @@ def test_dnn_spatialtf():
grad_fn = theano.function([t_img, t_theta, t_dy], img_grad) grad_fn = theano.function([t_img, t_theta, t_dy], img_grad)
dy = -1 + 2 * np.random.randn(*img.shape).astype(theano.config.floatX) dy_shp = (img.shape[0], img.shape[1], int(img.shape[2] * scale_height),
int(img.shape[3] * scale_width))
dy = -1 + 2 * np.random.randn(*dy_shp).astype(theano.config.floatX)
spatialtf_grad = grad_fn(img, transform, dy) spatialtf_grad = grad_fn(img, transform, dy)
# Check if function graph contains the spatial transformer Ops # Check if function graph contains the spatial transformer Ops
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论