Remove output param from spatial transformer and use hardcoded values for alpha and beta

上级 deec6576
...@@ -44,7 +44,6 @@ APPLY_SPECIFIC(dnn_sptf)(PyGpuArrayObject * input, ...@@ -44,7 +44,6 @@ APPLY_SPECIFIC(dnn_sptf)(PyGpuArrayObject * input,
PyGpuArrayObject * theta, PyGpuArrayObject * theta,
PyArrayObject * grid_dims, PyArrayObject * grid_dims,
cudnnSpatialTransformerDescriptor_t desc, cudnnSpatialTransformerDescriptor_t desc,
double alpha, double beta,
PyGpuArrayObject ** output, PyGpuArrayObject ** output,
PyGpuArrayObject ** grid, PyGpuArrayObject ** grid,
cudnnHandle_t _handle) cudnnHandle_t _handle)
...@@ -52,6 +51,7 @@ APPLY_SPECIFIC(dnn_sptf)(PyGpuArrayObject * input, ...@@ -52,6 +51,7 @@ APPLY_SPECIFIC(dnn_sptf)(PyGpuArrayObject * input,
PyGpuContextObject * gpu_ctx = input->context; PyGpuContextObject * gpu_ctx = input->context;
void * alpha_p; void * alpha_p;
void * beta_p; void * beta_p;
double alpha = 1.0, beta = 0.0;
float af = alpha, bf = beta; float af = alpha, bf = beta;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
int num_images, num_channels, height, width; int num_images, num_channels, height, width;
......
...@@ -14,7 +14,7 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * dims, ...@@ -14,7 +14,7 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * dims,
if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 ) if ( nimages == 0 || nchannels == 0 || height == 0 || width == 0 )
{ {
PyErr_SetString( PyExc_RuntimeError, PyErr_SetString( PyExc_RuntimeError,
"GpuDnnTransformerDescriptor: invalid grid dimensions" ); "GpuDnnTransformerDesc: invalid grid dimensions" );
return 1; return 1;
} }
...@@ -25,7 +25,7 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * dims, ...@@ -25,7 +25,7 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * dims,
if ( CUDNN_STATUS_SUCCESS != err ) if ( CUDNN_STATUS_SUCCESS != err )
{ {
PyErr_Format( PyExc_MemoryError, PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDescriptor: could not allocate descriptor: %s", "GpuDnnTransformerDesc: could not allocate descriptor: %s",
cudnnGetErrorString( err ) ); cudnnGetErrorString( err ) );
return 1; return 1;
} }
...@@ -33,11 +33,11 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * dims, ...@@ -33,11 +33,11 @@ int APPLY_SPECIFIC(dnn_sptf_desc)(PyArrayObject * dims,
// Currently, only the bilinear sampler is supported by cuDNN, // Currently, only the bilinear sampler is supported by cuDNN,
// so it is not available as a parameter // so it is not available as a parameter
err = cudnnSetSpatialTransformerNdDescriptor( *desc, CUDNN_SAMPLER_BILINEAR, err = cudnnSetSpatialTransformerNdDescriptor( *desc, CUDNN_SAMPLER_BILINEAR,
params->dtype, 4, out_tensor_dims ); params->precision, 4, out_tensor_dims );
if ( CUDNN_STATUS_SUCCESS != err ) if ( CUDNN_STATUS_SUCCESS != err )
{ {
PyErr_Format( PyExc_MemoryError, PyErr_Format( PyExc_MemoryError,
"GpuDnnTransformerDescriptor: could not initialize descriptor: %s", "GpuDnnTransformerDesc: could not initialize descriptor: %s",
cudnnGetErrorString( err ) ); cudnnGetErrorString( err ) );
return 1; return 1;
} }
......
...@@ -60,7 +60,6 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input, ...@@ -60,7 +60,6 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input,
PyArrayObject * grid_dims, PyArrayObject * grid_dims,
PyGpuArrayObject * dy, PyGpuArrayObject * dy,
cudnnSpatialTransformerDescriptor_t desc, cudnnSpatialTransformerDescriptor_t desc,
double alpha, double beta,
PyGpuArrayObject ** input_grad, PyGpuArrayObject ** input_grad,
PyGpuArrayObject ** grid_grad, PyGpuArrayObject ** grid_grad,
cudnnHandle_t _handle) cudnnHandle_t _handle)
...@@ -68,6 +67,7 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input, ...@@ -68,6 +67,7 @@ APPLY_SPECIFIC(dnn_sptf_gi)(PyGpuArrayObject * input,
PyGpuContextObject * gpu_ctx = input->context; PyGpuContextObject * gpu_ctx = input->context;
void * alpha_p; void * alpha_p;
void * beta_p; void * beta_p;
double alpha = 1.0, beta = 0.0;
float af = alpha, bf = beta; float af = alpha, bf = beta;
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
int input_num_images, input_num_channels, int input_num_images, input_num_channels,
......
...@@ -2833,15 +2833,13 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2833,15 +2833,13 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
return [rval] return [rval]
class GpuDnnTransformerDescriptor(COp): class GpuDnnTransformerDesc(COp):
""" """
This Op builds a spatial transformer descriptor for use in spatial transformer network This Op builds a spatial transformer descriptor for use in spatial transformer network
operations. operations.
""" """
__props__ = ('precision',)
__props__ = ('dtype',) params_type = ParamsType(precision=cudnn.cudnnDataType_t)
params_type = ParamsType(dtype=cudnn.cudnnDataType_t)
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h']
...@@ -2864,10 +2862,10 @@ class GpuDnnTransformerDescriptor(COp): ...@@ -2864,10 +2862,10 @@ class GpuDnnTransformerDescriptor(COp):
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
def __init__(self, dtype=theano.config.floatX): def __init__(self, precision=theano.config.floatX):
COp.__init__(self, ["c_code/dnn_sptf_desc.c"], "APPLY_SPECIFIC(dnn_sptf_desc)") COp.__init__(self, ["c_code/dnn_sptf_desc.c"], "APPLY_SPECIFIC(dnn_sptf_desc)")
assert cudnn.cudnnDataType_t.has_alias(dtype) assert cudnn.cudnnDataType_t.has_alias(precision)
self.dtype = dtype self.precision = precision
def make_node(self, dimensions): def make_node(self, dimensions):
dimensions = as_tensor_variable(dimensions) dimensions = as_tensor_variable(dimensions)
...@@ -2883,7 +2881,7 @@ class GpuDnnTransformerDescriptor(COp): ...@@ -2883,7 +2881,7 @@ class GpuDnnTransformerDescriptor(COp):
return node return node
def c_code_cache_version(self): def c_code_cache_version(self):
return (super(GpuDnnTransformerDescriptor, self).c_code_cache_version(), version()) return (super(GpuDnnTransformerDesc, self).c_code_cache_version(), version())
class GpuDnnTransformer(DnnBase): class GpuDnnTransformer(DnnBase):
...@@ -2892,19 +2890,22 @@ class GpuDnnTransformer(DnnBase): ...@@ -2892,19 +2890,22 @@ class GpuDnnTransformer(DnnBase):
implements the grid generator and sampler. The localization network can implements the grid generator and sampler. The localization network can
be built using neural net components of Theano. be built using neural net components of Theano.
""" """
__props__ = ('dtype',) __props__ = ()
_cop_num_inputs = 6 _cop_num_inputs = 4
_cop_num_outputs = 2 _cop_num_outputs = 2
_f16_ok = True _f16_ok = True
default_output = 0 default_output = 0
def __init__(self, dtype): def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "APPLY_SPECIFIC(dnn_sptf)") DnnBase.__init__(self, ["c_code/dnn_sptf.c"], "APPLY_SPECIFIC(dnn_sptf)")
self.dtype = dtype
def make_node(self, img, theta, output, desc, alpha=None, beta=None): def make_node(self, img, theta, desc):
context_name = infer_context_name(desc) context_name = infer_context_name(desc)
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
img = gpu_contiguous(as_gpuarray_variable(img, context_name)) img = gpu_contiguous(as_gpuarray_variable(img, context_name))
if img.type.ndim != 4: if img.type.ndim != 4:
raise TypeError('img must be a 4D tensor') raise TypeError('img must be a 4D tensor')
...@@ -2913,61 +2914,52 @@ class GpuDnnTransformer(DnnBase): ...@@ -2913,61 +2914,52 @@ class GpuDnnTransformer(DnnBase):
theta = gpu_contiguous(as_gpuarray_variable(theta, context_name)) theta = gpu_contiguous(as_gpuarray_variable(theta, context_name))
assert theta.dtype in ('float16', 'float32', 'float64') assert theta.dtype in ('float16', 'float32', 'float64')
assert theta.ndim == 3
# Setup grid dimensions using input from descriptor # Setup grid dimensions using input from descriptor
grid_dims = as_tensor_variable(desc.owner.inputs[0]) grid_dims = as_tensor_variable(desc.owner.inputs[0])
output = gpu_contiguous(as_gpuarray_variable(output, context_name)) output = GpuArrayType(dtype=img.dtype,
if output.type.ndim != 4: broadcastable=img.type.ndim * (False,),
raise TypeError('output must be a 4D tensor') context_name=context_name)()
if (not isinstance(desc.type, CDataType) or
desc.type.ctype != 'cudnnSpatialTransformerDescriptor_t'):
raise ValueError('desc must be cudnnSpatialTransformerDescriptor_t')
alpha = ensure_dt(alpha, _one, 'alpha', img.dtype)
beta = ensure_dt(beta, _zero, 'beta', img.dtype)
grid = GpuArrayType(dtype=self.dtype, grid = GpuArrayType(dtype=img.dtype,
broadcastable=img.type.ndim * (False,), broadcastable=img.type.ndim * (False,),
context_name=context_name)() context_name=context_name)()
inputs = [img, theta, grid_dims, desc, alpha, beta] inputs = [img, theta, grid_dims, desc]
outputs = [output.type(), grid] outputs = [output, grid]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def L_op(self, inputs, outputs, grads): def L_op(self, inputs, outputs, grads):
img, theta, output, desc, alpha, beta = inputs img, theta, grid_dims, desc = inputs
_, grid = outputs _, grid = outputs
dy = grads[0] dy = grads[0]
dimg, dgrid = GpuDnnTransformerGradI(self.dtype)(img, theta, grid, dy, dimg, dgrid = GpuDnnTransformerGradI()(img, theta, grid, dy, desc)
desc, alpha, beta) dtheta = GpuDnnTransformerGradT()(dgrid, desc)
dtheta = GpuDnnTransformerGradT(self.dtype)(dgrid, desc) dgrid_dims = grad_not_implemented(self, grid_dims, 2)
dalpha = theano.gradient.grad_not_implemented(self, 4, alpha)
dbeta = theano.gradient.grad_not_implemented(self, 5, beta)
return [dimg, dtheta, dy, DisconnectedType()(), dalpha, dbeta] return [dimg, dtheta, dgrid_dims, DisconnectedType()()]
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to desc
return [[1, 1], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1]] return [[1, 1], [1, 1], [1, 1], [0, 0]]
class GpuDnnTransformerGradI(DnnBase): class GpuDnnTransformerGradI(DnnBase):
""" """
Gradients of inputs of the spatial transformer Gradients of inputs of the spatial transformer
""" """
__props__ = ('dtype',) __props__ = ()
_cop_num_inputs = 8 _cop_num_inputs = 6
_cop_num_outputs = 2 _cop_num_outputs = 2
_f16_ok = True _f16_ok = True
def __init__(self, dtype=theano.config.floatX): def __init__(self, dtype=theano.config.floatX):
DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)") DnnBase.__init__(self, ["c_code/dnn_sptf_gi.c"], "APPLY_SPECIFIC(dnn_sptf_gi)")
self.dtype = dtype
def make_node(self, img, theta, grid, dy, desc, alpha, beta): def make_node(self, img, theta, grid, dy, desc):
context_name = infer_context_name(img, theta, grid, dy, desc) context_name = infer_context_name(img, theta, grid, dy, desc)
if (not isinstance(desc.type, CDataType) or if (not isinstance(desc.type, CDataType) or
...@@ -2993,58 +2985,50 @@ class GpuDnnTransformerGradI(DnnBase): ...@@ -2993,58 +2985,50 @@ class GpuDnnTransformerGradI(DnnBase):
if img.ndim != 4: if img.ndim != 4:
raise TypeError('img must have 4 dimensions.') raise TypeError('img must have 4 dimensions.')
alpha = as_scalar(alpha) dimg = GpuArrayType(dtype=img.dtype,
beta = as_scalar(beta)
dimg = GpuArrayType(dtype=self.dtype,
broadcastable=img.type.ndim * (False,), broadcastable=img.type.ndim * (False,),
context_name=context_name)() context_name=context_name)()
dgrid = GpuArrayType(dtype=self.dtype, dgrid = GpuArrayType(dtype=img.dtype,
broadcastable=img.type.ndim * (False,), broadcastable=img.type.ndim * (False,),
context_name=context_name)() context_name=context_name)()
inputs = [img, theta, grid, grid_dims, dy, desc, alpha, beta] inputs = [img, theta, grid, grid_dims, dy, desc]
outputs = [dimg, dgrid] outputs = [dimg, dgrid]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def L_op(self, inputs, outputs, grads): def L_op(self, inputs, outputs, grads):
img, theta, grid, grid_dims, dy, desc, alpha, beta = inputs img, theta, grid, grid_dims, dy, desc = inputs
dimg_out, dgrid = outputs dimg_out, dgrid = outputs
grad_cost = grads[0] grad_cost = grads[0]
dimg = dimg_out * grad_cost dimg = dimg_out * grad_cost
dtheta = GpuDnnTransformerGradT(self.dtype)(dgrid, desc) dtheta = GpuDnnTransformerGradT()(dgrid, desc)
dgrid_dims = grad_not_implemented(self, grid_dims, 3) dgrid_dims = grad_not_implemented(self, grid_dims, 3)
d_dy = grad_not_implemented(self, dy, 4) d_dy = grad_not_implemented(self, dy, 4)
dalpha = grad_not_implemented(self, alpha, 5) return [dimg, dtheta, dgrid, dgrid_dims, d_dy, DisconnectedType()()]
dbeta = grad_not_implemented(self, beta, 6)
return [dimg, dtheta, dgrid, dgrid_dims, d_dy,
DisconnectedType()(), dalpha, dbeta]
def connection_pattern(self, node): def connection_pattern(self, node):
# not connected to desc # not connected to desc
return [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [0, 0], [1, 1], [1, 1]] return [[1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [0, 0]]
class GpuDnnTransformerGradT(DnnBase): class GpuDnnTransformerGradT(DnnBase):
""" """
Gradients of the affine transformation generated by the localisation network Gradients of the affine transformation generated by the localisation network
""" """
__props__ = ('dtype',) __props__ = ()
_cop_num_inputs = 2 _cop_num_inputs = 2
_cop_num_outputs = 1 _cop_num_outputs = 1
_f16_ok = True _f16_ok = True
def __init__(self, dtype=theano.config.floatX): def __init__(self):
DnnBase.__init__(self, ["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)") DnnBase.__init__(self, ["c_code/dnn_sptf_gt.c"], "APPLY_SPECIFIC(dnn_sptf_gt)")
self.dtype = dtype
def make_node(self, dgrid, desc): def make_node(self, dgrid, desc):
context_name = infer_context_name(dgrid) context_name = infer_context_name(dgrid)
dtheta = GpuArrayType(dtype=self.dtype, dtheta = GpuArrayType(dtype=dgrid.dtype,
broadcastable=(dgrid.type.ndim - 1) * (False,), broadcastable=(dgrid.type.ndim - 1) * (False,),
context_name=context_name)() context_name=context_name)()
inputs = [dgrid, desc] inputs = [dgrid, desc]
...@@ -3063,8 +3047,7 @@ class GpuDnnTransformerGradT(DnnBase): ...@@ -3063,8 +3047,7 @@ class GpuDnnTransformerGradT(DnnBase):
return [[1], [0]] return [[1], [0]]
def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, alpha=None, beta=None, def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, precision=theano.config.floatX):
dtype=theano.config.floatX):
""" """
GPU spatial transformer using cuDNN from NVIDIA. GPU spatial transformer using cuDNN from NVIDIA.
...@@ -3105,7 +3088,7 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, alpha=None, beta=No ...@@ -3105,7 +3088,7 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, alpha=None, beta=No
grid_dims = tuple([as_scalar(v).astype('int32') for v in grid_dims]) grid_dims = tuple([as_scalar(v).astype('int32') for v in grid_dims])
# Create spatial transformer descriptor # Create spatial transformer descriptor
desc = GpuDnnTransformerDescriptor(dtype)(grid_dims) desc = GpuDnnTransformerDesc(precision)(grid_dims)
context_name = infer_context_name(desc) context_name = infer_context_name(desc)
img = gpu_contiguous(as_gpuarray_variable(img, context_name)) img = gpu_contiguous(as_gpuarray_variable(img, context_name))
...@@ -3116,9 +3099,8 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, alpha=None, beta=No ...@@ -3116,9 +3099,8 @@ def dnn_spatialtf(img, theta, scale_width=1, scale_height=1, alpha=None, beta=No
# Theta is an array of transformation matrices and must have shape: (num_images, 2, 3) # Theta is an array of transformation matrices and must have shape: (num_images, 2, 3)
assert theta.ndim == 3 assert theta.ndim == 3
output = GpuAllocEmpty(img.dtype, context_name)(*grid_dims)
# Setup spatial transformer # Setup spatial transformer
transformer = GpuDnnTransformer(dtype)(img, theta, output, desc, alpha, beta) transformer = GpuDnnTransformer()(img, theta, desc)
return transformer return transformer
......
...@@ -2511,18 +2511,16 @@ def test_dnn_spatialtf_grad(): ...@@ -2511,18 +2511,16 @@ def test_dnn_spatialtf_grad():
# Verify grad wrt input # Verify grad wrt input
def functor_wrt_i(input): def functor_wrt_i(input):
out = GpuAllocEmpty(theano.config.floatX, context_name=test_ctx_name)(*out_shp) desc = dnn.GpuDnnTransformerDesc(theano.config.floatX)(out_shp)
desc = dnn.GpuDnnTransformerDescriptor(theano.config.floatX)(out_shp) transformed_input = dnn.GpuDnnTransformer()(input, theta, desc)
transformed_input = dnn.GpuDnnTransformer(theano.config.floatX)(input, theta, out, desc)
grad = T.grad(T.mean(transformed_input), input) grad = T.grad(T.mean(transformed_input), input)
return grad return grad
# Verify grad wrt theta # Verify grad wrt theta
def functor_wrt_t(theta): def functor_wrt_t(theta):
out = GpuAllocEmpty(theano.config.floatX, context_name=test_ctx_name)(*out_shp) desc = dnn.GpuDnnTransformerDesc(theano.config.floatX)(out_shp)
desc = dnn.GpuDnnTransformerDescriptor(theano.config.floatX)(out_shp) transformed_input = dnn.GpuDnnTransformer()(img, theta, out, desc)
transformed_input = dnn.GpuDnnTransformer(theano.config.floatX)(img, theta, out, desc)
grad = T.grad(T.mean(transformed_input), theta) grad = T.grad(T.mean(transformed_input), theta)
return grad return grad
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论