Update spatial transformer to pass grid dimensions only in the descriptor op

上级 489f9ccd
#section support_code_apply #section support_code_apply
int APPLY_SPECIFIC(dnn_sptf_desc)(npy_int32 dim_nimages, int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * dims,
npy_int32 dim_nchannels,
npy_int32 dim_height,
npy_int32 dim_width,
cudnnSpatialTransformerDescriptor_t * desc, cudnnSpatialTransformerDescriptor_t * desc,
PARAMS_TYPE * params) PARAMS_TYPE * params)
{ {
cudnnStatus_t err; cudnnStatus_t err;
const int nimages = (int) dim_nimages; const int nimages = *((int *) PyArray_GETPTR1(dims, 0));
const int nchannels = (int) dim_nchannels; const int nchannels = *((int *) PyArray_GETPTR1(dims, 1));
const int height = (int) dim_height; const int height = *((int *) PyArray_GETPTR1(dims, 2));
const int width = (int) dim_width; const int width = *((int *) PyArray_GETPTR1(dims, 3));
if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 ) if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 )
{ {
PyErr_SetString( PyExc_RuntimeError, "Invalid grid dimensions" ); PyErr_SetString( PyExc_RuntimeError,
return -1; "GpuDnnTransformerDescriptor: invalid grid dimensions" );
return 1;
} }
// num_images, num_channels, height, width // num_images, num_channels, height, width
...@@ -27,9 +25,9 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(npy_int32 dim_nimages, ...@@ -27,9 +25,9 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(npy_int32 dim_nimages,
if ( CUDNN_STATUS_SUCCESS != err ) if ( CUDNN_STATUS_SUCCESS != err )
{ {
PyErr_Format( PyExc_MemoryError, PyErr_Format( PyExc_MemoryError,
"Failed to allocate spatial transformer descriptor: %s", "GpuDnnTransformerDescriptor: could not allocate descriptor: %s",
cudnnGetErrorString( err ) ); cudnnGetErrorString( err ) );
return -1; return 1;
} }
// Currently, only the bilinear sampler is supported by cuDNN, // Currently, only the bilinear sampler is supported by cuDNN,
...@@ -39,9 +37,9 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(npy_int32 dim_nimages, ...@@ -39,9 +37,9 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(npy_int32 dim_nimages,
if ( CUDNN_STATUS_SUCCESS != err ) if ( CUDNN_STATUS_SUCCESS != err )
{ {
PyErr_Format( PyExc_MemoryError, PyErr_Format( PyExc_MemoryError,
"Failed to initialize spatial transformer descriptor: %s", "GpuDnnTransformerDescriptor: could not initialize descriptor: %s",
cudnnGetErrorString( err ) ); cudnnGetErrorString( err ) );
return -1; return 1;
} }
return 0; return 0;
......
...@@ -2849,7 +2849,7 @@ class GpuDnnTransformerDescriptor(COp): ...@@ -2849,7 +2849,7 @@ class GpuDnnTransformerDescriptor(COp):
def c_header_dirs(self): def c_header_dirs(self):
header_dirs = [os.path.dirname(__file__)] header_dirs = [os.path.dirname(__file__)]
if config.dnn.include_path: if config.dnn.include_path:
headers_dirs += [config.dnn.include_path] header_dirs += [config.dnn.include_path]
return header_dirs return header_dirs
def c_libraries(self): def c_libraries(self):
...@@ -2866,18 +2866,12 @@ class GpuDnnTransformerDescriptor(COp): ...@@ -2866,18 +2866,12 @@ class GpuDnnTransformerDescriptor(COp):
def __init__(self, dtype=theano.config.floatX): def __init__(self, dtype=theano.config.floatX):
COp.__init__(self, ["c_code/dnn_sptf_desc.c"], "APPLY_SPECIFIC(dnn_sptf_desc)") COp.__init__(self, ["c_code/dnn_sptf_desc.c"], "APPLY_SPECIFIC(dnn_sptf_desc)")
assert cudnn.cudnnDataType_t.has_alias(dtype) assert cudnn.cudnnDataType_t.has_alias(dtype)
self.dtype = dtype self.dtype = dtype
def make_node(self, dimensions): def make_node(self, dimensions):
# cuDNN supports only 2D transformations, and the output tensor must dimensions = as_tensor_variable(dimensions)
# have exactly 4 dimensions: (num_images, num_channels, height, width) node = Apply(self, [dimensions],
assert len(dimensions) == 4
dimensions = tuple(dimensions)
nimages, nchannels, height, width = dimensions
node = Apply(self, [nimages, nchannels, height, width],
[CDataType("cudnnSpatialTransformerDescriptor_t", [CDataType("cudnnSpatialTransformerDescriptor_t",
freefunc="cudnnDestroySpatialTransformerDescriptor")()]) freefunc="cudnnDestroySpatialTransformerDescriptor")()])
# DebugMode cannot compare the values of CDataType variables, so by # DebugMode cannot compare the values of CDataType variables, so by
...@@ -2908,23 +2902,22 @@ class GpuDnnTransformer(DnnBase): ...@@ -2908,23 +2902,22 @@ class GpuDnnTransformer(DnnBase):
DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "APPLY_SPECIFIC(dnn_sptf)") DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "APPLY_SPECIFIC(dnn_sptf)")
self.dtype = dtype self.dtype = dtype
def make_node(self, img, theta, output, grid_dims, desc, alpha=None, beta=None): def make_node(self, img, theta, output, desc, alpha=None, beta=None):
assert theta.dtype in ('float16', 'float32', 'float64')
context_name = infer_context_name(img) context_name = infer_context_name(img)
theta = as_gpuarray_variable(theta, context_name) img = gpu_contiguous(as_gpuarray_variable(img, context_name))
img = as_gpuarray_variable(img, context_name)
grid_dims = as_tensor_variable(grid_dims)
output = as_gpuarray_variable(output, context_name)
grid = GpuArrayType(dtype=self.dtype,
broadcastable=img.type.ndim * (False,),
context_name=context_name)()
if img.type.ndim != 4: if img.type.ndim != 4:
raise TypeError('img must be a 4D tensor') raise TypeError('img must be a 4D tensor')
elif img.dtype not in ('float16', 'float32', 'float64'):
raise TypeError('img type must be floating-point')
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64')
# Setup grid dimensions using input from descriptor
grid_dims = as_tensor_variable(desc.owner.inputs[0])
output = gpu_contiguous(as_gpuarray_variable(output, context_name))
if output.type.ndim != 4: if output.type.ndim != 4:
raise TypeError('output must be a 4D tensor') raise TypeError('output must be a 4D tensor')
...@@ -2935,6 +2928,10 @@ class GpuDnnTransformer(DnnBase): ...@@ -2935,6 +2928,10 @@ class GpuDnnTransformer(DnnBase):
alpha = ensure_dt(alpha, _one, 'alpha', img.dtype) alpha = ensure_dt(alpha, _one, 'alpha', img.dtype)
beta = ensure_dt(beta, _zero, 'beta', img.dtype) beta = ensure_dt(beta, _zero, 'beta', img.dtype)
grid = GpuArrayType(dtype=self.dtype,
broadcastable=img.type.ndim * (False,),
context_name=context_name)()
inputs = [img, theta, grid_dims, desc, alpha, beta] inputs = [img, theta, grid_dims, desc, alpha, beta]
outputs = [output.type(), grid] outputs = [output.type(), grid]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
...@@ -2973,7 +2970,7 @@ class GpuDnnTransformerGradI(DnnBase): ...@@ -2973,7 +2970,7 @@ class GpuDnnTransformerGradI(DnnBase):
DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)") DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)")
self.dtype = dtype self.dtype = dtype
def make_node(self, img, theta, grid, grid_dims, dy, desc, alpha, beta): def make_node(self, img, theta, grid, dy, desc, alpha, beta):
context_name = infer_context_name(img) context_name = infer_context_name(img)
if img.ndim != 4: if img.ndim != 4:
...@@ -2984,7 +2981,10 @@ class GpuDnnTransformerGradI(DnnBase): ...@@ -2984,7 +2981,10 @@ class GpuDnnTransformerGradI(DnnBase):
img = as_gpuarray_variable(gpu_contiguous(img), context_name) img = as_gpuarray_variable(gpu_contiguous(img), context_name)
theta = as_gpuarray_variable(gpu_contiguous(theta), context_name) theta = as_gpuarray_variable(gpu_contiguous(theta), context_name)
grid = as_gpuarray_variable(gpu_contiguous(grid), context_name) grid = as_gpuarray_variable(gpu_contiguous(grid), context_name)
grid_dims = as_tensor_variable(grid_dims)
# Setup grid dimensions from descriptor's input
grid_dims = as_tensor_variable(desc.owner.inputs[0])
dy = as_gpuarray_variable(dy, context_name) dy = as_gpuarray_variable(dy, context_name)
alpha = as_scalar(alpha) alpha = as_scalar(alpha)
beta = as_scalar(beta) beta = as_scalar(beta)
...@@ -3070,29 +3070,26 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, alpha=None, beta=No ...@@ -3070,29 +3070,26 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, alpha=None, beta=No
Also, the only grid sampler method available is the bilinear interpolation. Also, the only grid sampler method available is the bilinear interpolation.
""" """
# inp is a 4D tensor with shape: (num_inputs, num_channels, height, width)
assert img.ndim == 4
# Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
assert theta.ndim == 3
grid_dims = (img.shape[0], img.shape[1], grid_dims = (img.shape[0], img.shape[1],
img.shape[2] * scale_height, img.shape[2] * scale_height,
img.shape[3] * scale_width) img.shape[3] * scale_width)
grid_dims = tuple(map(lambda v: as_scalar(v).astype('int32'), list(grid_dims))) grid_dims = tuple([as_scalar(v).astype('int32') for v in grid_dims])
# Create spatial transformer descriptor # Create spatial transformer descriptor
desc = GpuDnnTransformerDescriptor(dtype)(grid_dims) desc = GpuDnnTransformerDescriptor(dtype)(grid_dims)
# Create grid dimensions variable
grid_dims_var = as_tensor_variable(grid_dims)
context_name = infer_context_name(desc) context_name = infer_context_name(desc)
img = gpu_contiguous(as_gpuarray_variable(img, context_name)) img = gpu_contiguous(as_gpuarray_variable(img, context_name))
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name)) theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
# inp is a 4D tensor with shape: (num_inputs, num_channels, height, width)
assert img.ndim == 4
# Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
assert theta.ndim == 3
output = GpuAllocEmpty(img.dtype, context_name)(*grid_dims) output = GpuAllocEmpty(img.dtype, context_name)(*grid_dims)
# Setup spatial transformer # Setup spatial transformer
transformer = GpuDnnTransformer(dtype)(img, theta, output, grid_dims_var, desc, alpha, beta) transformer = GpuDnnTransformer(dtype)(img, theta, output, desc, alpha, beta)
return transformer return transformer
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论