提交 a668c6c5 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #4587 from niasla/dilated_convolution

Implementation of 2D dilated convolution/correlation.
......@@ -1393,6 +1393,9 @@ def local_abstractconv_cudnn(node):
inp1 = node.inputs[0]
inp2 = node.inputs[1]
if (node.op.filter_dilation != (1, 1)):
return None
if not isinstance(inp1.type, GpuArrayType):
return None
......
......@@ -3116,6 +3116,8 @@ def local_abstractconv_cudnn(node):
AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs))):
return None
if (node.op.filter_dilation != (1, 1)):
return None
inp1 = node.inputs[0]
inp2 = node.inputs[1]
......@@ -3123,6 +3125,7 @@ def local_abstractconv_cudnn(node):
if (not isinstance(inp1.type, CudaNdarrayType) or
not isinstance(inp2.type, CudaNdarrayType)):
return None
if not dnn_available():
return None
......
......@@ -1622,7 +1622,8 @@ def local_conv_gemm(node):
# because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization)
rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
GpuCorrMM_gradWeights(border_mode, subsample)(
GpuCorrMM_gradWeights(border_mode,
subsample)(
gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
).dimshuffle(1, 0, 2, 3))
......@@ -2769,20 +2770,25 @@ def local_abstractconv_gemm(node):
border_mode = node.op.border_mode
subsample = node.op.subsample
if (border_mode == 'full') and (subsample == (1, 1)):
filter_dilation = node.op.filter_dilation
if ((border_mode == 'full') and (subsample == (1, 1))):
if not node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1]
# need to dimshuffle the kernel for full convolution
kern = kern.dimshuffle(1, 0, 2, 3)
# call GpuCorrMM_gradInputs
rval = GpuCorrMM_gradInputs('valid', subsample)(
rval = GpuCorrMM_gradInputs('valid',
subsample,
filter_dilation)(
gpu_contiguous(kern), gpu_contiguous(img))
else:
# need to flip the kernel if necessary
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1]
# By default use GpuCorrMM
rval = GpuCorrMM(border_mode, subsample)(gpu_contiguous(img),
rval = GpuCorrMM(border_mode,
subsample,
filter_dilation)(gpu_contiguous(img),
gpu_contiguous(kern))
# call GpuCorrMM_gradWeights if good
......@@ -2790,7 +2796,7 @@ def local_abstractconv_gemm(node):
# is larger than inputChannels * outputHeight * outputWidth.
# GpuConv does not always store information on the batchsize and
# channels, though, so we only use what information we have.)
if ((subsample == (1, 1)) and
if ((subsample == (1, 1)) and (filter_dilation == (1, 1)) and
(node.op.imshp is not None) and
(None not in node.op.imshp[-2:]) and
(node.op.kshp is not None) and
......@@ -2810,7 +2816,9 @@ def local_abstractconv_gemm(node):
# because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization)
rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
GpuCorrMM_gradWeights(border_mode, subsample)(
GpuCorrMM_gradWeights(border_mode,
subsample,
filter_dilation)(
gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
).dimshuffle(1, 0, 2, 3))
......@@ -2827,7 +2835,8 @@ def local_abstractconv_gradweight_gemm(node):
return None
rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(
gpu_contiguous(img), gpu_contiguous(topgrad), shape)
if node.op.filter_flip:
rval = rval[:, :, ::-1, ::-1]
......@@ -2849,7 +2858,8 @@ def local_abstractconv_gradinputs_gemm(node):
kern = kern[:, :, ::-1, ::-1]
rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample)(
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(
gpu_contiguous(kern), gpu_contiguous(topgrad), shape)
return [rval]
......@@ -2870,10 +2880,12 @@ conv_groupopt.register('local_abstractconv_dnn',
conv_groupopt.register('local_abstractconv_gemm', local_abstractconv_gemm, 30,
'conv_gemm',
'gpu', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradweight_gemm',
local_abstractconv_gradweight_gemm, 30,
'conv_gemm',
'gpu', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradinputs_gemm',
local_abstractconv_gradinputs_gemm, 30,
'conv_gemm',
......
......@@ -29,25 +29,30 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
self.provide_shape = [False]
self.shared = gpu_shared
def tcase(self, i, f, s, b, flip, provide_shape):
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
if fd != (1, 1):
raise SkipTest("No dilation implementation for cuDNN ConvOp.")
if not dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
mode = mode_with_gpu
o = self.get_output_shape(i, f, s, b)
o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConv)
filter_flip=flip, target_op=GpuDnnConv,
filter_dilation=fd)
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradW)
filter_flip=flip, target_op=GpuDnnConvGradW,
filter_dilation=fd)
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI)
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
......@@ -56,28 +61,30 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
self.shared = gpu_shared
self.mode = mode_with_gpu.excluding('cudnn')
def tcase(self, i, f, s, b, flip, provide_shape):
def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
mode = self.mode
o = self.get_output_shape(i, f, s, b)
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, mode=mode,
o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(inputs_shape=i, filters_shape=f,
subsample=s, verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=(GpuCorrMM,
filter_flip=flip, target_op=(GpuCorrMM,
GpuCorrMM_gradWeights,
GpuCorrMM_gradInputs))
GpuCorrMM_gradInputs),
filter_dilation=fd)
self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradWeights)
target_op=GpuCorrMM_gradWeights,
filter_dilation=fd)
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs)
target_op=GpuCorrMM_gradInputs,
filter_dilation=fd)
class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
......
......@@ -35,7 +35,7 @@ from .abstract_conv import conv2d as abstract_conv2d
def conv2d(input, filters, input_shape=None, filter_shape=None,
border_mode='valid', subsample=(1, 1), filter_flip=True,
image_shape=None, **kwargs):
image_shape=None, filter_dilation=(1, 1), **kwargs):
"""
This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
......@@ -95,6 +95,10 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
image_shape: None, tuple/list of len 4 of int or Constant variable
Deprecated alias for input_shape.
filter_dilation: tuple of len 2
Factor by which to subsample (stride) the input.
Also called dilation elsewhere.
kwargs: Any other keyword arguments are accepted for backwards
compatibility, but will be ignored.
......@@ -140,4 +144,5 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
" be provided at the same time.")
return abstract_conv2d(input, filters, input_shape, filter_shape,
border_mode, subsample, filter_flip)
border_mode, subsample, filter_flip,
filter_dilation)
差异被折叠。
......@@ -31,20 +31,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Loops for fast unfold + copy
void im2col(const %(float_type)s* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
%(float_type)s* data_col) {
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
// Implicit dilated kernel size
int dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
int dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w;
for (int c = 0; c < channels_col; ++c) {
int w_offset = c %% kernel_w;
int h_offset = (c / kernel_w) %% kernel_h;
int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(npy_intp)(c * height_col + h) * width_col + w] =
data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad];
......@@ -60,10 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels,
// accumulated into data_im.
void col2im(const %(float_type)s* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, %(float_type)s* data_im) {
int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
// Implicit dilated patch
int dil_patch_h = (patch_h - 1) * dilation_h + 1;
int dil_patch_w = (patch_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1;
int num_kernels = channels * height * width;
int channels_col = channels * patch_h * patch_w;
for (int c = 0; c < channels_col; ++c) {
......@@ -71,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels,
int h_offset = (c / patch_w) %% patch_h;
int c_im = c / patch_h / patch_w;
for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset;
int w_pad = w * stride_w - pad_w + w_offset;
int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad] +=
data_col[(npy_intp)(c * height_col + h) * width_col + w];
......@@ -96,6 +104,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const int direction,
const int dH = 1,
const int dW = 1,
const int dilH = 1,
const int dilW = 1,
const int padH = 0,
const int padW = 0)
{
......@@ -151,9 +161,12 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
"CorrMM images and kernel must have the same stack size\n");
return NULL;
}
// implicit dilated filter
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - kW) / dW + 1;
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
......@@ -206,7 +219,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for (int n = 0; n < batchSize; n++) {
// First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col));
bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col));
// Second, gemm
%(gemm)s(&NTrans, &NTrans,
&N_, &M_, &K_,
......@@ -255,7 +269,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for (int n = 0; n < batchSize; n++) {
// First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col));
bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col));
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
......@@ -314,7 +329,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
(%(float_type)s*)PyArray_DATA(col), &N_);
// col2im back to the data
col2im((%(float_type)s*)PyArray_DATA(col), nChannels, bottomHeight, bottomWidth,
kH, kW, padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride);
kH, kW, dilH, dilW, padH, padW,
dH, dW, (%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride);
}
/*
// Original caffe code for comparison
......
......@@ -79,7 +79,8 @@ def local_abstractconv_gemm(node):
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1]
rval = CorrMM(border_mode=node.op.border_mode,
subsample=node.op.subsample)(img, kern)
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(img, kern)
copy_stack_trace(node.outputs[0], rval)
return [rval]
......@@ -97,7 +98,8 @@ def local_abstractconv_gradweight_gemm(node):
return None
rval = CorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample)(img, topgrad, shape)
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(img, topgrad, shape)
copy_stack_trace(node.outputs[0], rval)
# need to flip the kernel if necessary
......@@ -124,7 +126,8 @@ def local_abstractconv_gradinputs_gemm(node):
if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1]
rval = CorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample)(kern, topgrad,
subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(kern, topgrad,
shape)
copy_stack_trace(node.outputs[0], rval)
......@@ -221,7 +224,9 @@ def local_conv2d_gradweight_cpu(node):
assert len(op_imshp) == 4 and len(op_kshp) == 4
outshp = get_conv_output_shape(op_imshp, op_kshp,
node.op.border_mode, node.op.subsample)[2:]
node.op.border_mode,
node.op.subsample,
node.op.filter_dilation)[2:]
fulloutshp = get_conv_output_shape(op_imshp, op_kshp,
node.op.border_mode, (1, 1))[2:]
......@@ -334,7 +339,9 @@ def local_conv2d_gradinputs_cpu(node):
filters = filters[:, :, ::-1, ::-1]
outshp = get_conv_output_shape(op_imshp, op_kshp,
node.op.border_mode, node.op.subsample)[2:]
node.op.border_mode,
node.op.subsample,
node.op.filter_dilation)[2:]
fulloutshp = get_conv_output_shape(op_imshp, op_kshp,
node.op.border_mode, (1, 1))[2:]
......
......@@ -32,8 +32,8 @@ class TestCorr2D(utt.InferShapeTester):
def validate(self, image_shape, filter_shape,
border_mode='valid', subsample=(1, 1),
input=None, filters=None,
verify_grad=True, non_contiguous=False):
input=None, filters=None, verify_grad=True,
non_contiguous=False, filter_dilation=(1, 1)):
"""
:param image_shape: The constant shape info passed to corrMM.
:param filter_shape: The constant shape info passed to corrMM.
......@@ -55,7 +55,8 @@ class TestCorr2D(utt.InferShapeTester):
# define theano graph and function
input.name = 'input'
filters.name = 'filters'
rval = corr.CorrMM(border_mode, subsample)(input, filters)
rval = corr.CorrMM(border_mode, subsample,
filter_dilation)(input, filters)
rval.name = 'corr_output'
return rval
......@@ -86,20 +87,22 @@ class TestCorr2D(utt.InferShapeTester):
orig_image_data = image_data
img_shape2d = numpy.array(N_image_shape[-2:])
fil_shape2d = numpy.array(N_filter_shape[-2:])
dil_shape2d = numpy.array(filter_dilation)
dil_fil_shape2d = (fil_shape2d - 1) * dil_shape2d + 1
subsample2d = numpy.array(subsample)
if border_mode == 'full':
padHW = (fil_shape2d - 1)
padHW = (dil_fil_shape2d - 1)
elif border_mode == 'valid':
padHW = numpy.array([0, 0])
elif border_mode == 'half':
padHW = numpy.floor(fil_shape2d / 2).astype('int32')
padHW = numpy.floor(dil_fil_shape2d / 2).astype('int32')
elif isinstance(border_mode, tuple):
padHW = numpy.array(border_mode)
elif isinstance(border_mode, integer_types):
padHW = numpy.array([border_mode, border_mode])
else:
raise NotImplementedError('Unsupported border_mode {}'.format(border_mode))
out_shape2d = numpy.floor((img_shape2d + 2 * (padHW) - fil_shape2d) / subsample2d) + 1
out_shape2d = numpy.floor((img_shape2d + 2 * (padHW) - dil_fil_shape2d) / subsample2d) + 1
# avoid numpy deprecation
out_shape2d = out_shape2d.astype('int32')
out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d)
......@@ -124,8 +127,8 @@ class TestCorr2D(utt.InferShapeTester):
for col in range(ref_output.shape[3]):
icol = col * subsample[1] # image col
ref_output[bb, nn, row, col] += (image2d[
irow:irow + N_filter_shape[2],
icol:icol + N_filter_shape[3]] * filter2d[::-1, ::-1]
irow:irow + dil_fil_shape2d[0]:filter_dilation[0],
icol:icol + dil_fil_shape2d[1]:filter_dilation[1]] * filter2d[::-1, ::-1]
).sum()
self.assertTrue(_allclose(theano_output, ref_output))
......@@ -186,6 +189,28 @@ class TestCorr2D(utt.InferShapeTester):
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3))
def test_filter_dilation(self):
"""
Tests correlation where filter dilation != (1,1)
"""
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'valid', filter_dilation=(2, 2))
self.validate((3, 2, 14, 10), (5, 2, 2, 3), 'valid', filter_dilation=(3, 1))
self.validate((1, 1, 14, 14), (1, 1, 3, 3), 'valid', filter_dilation=(2, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', filter_dilation=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', filter_dilation=(3, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 'full', filter_dilation=(2, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'half', filter_dilation=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'half', filter_dilation=(3, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 'half', filter_dilation=(2, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 1), filter_dilation=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), filter_dilation=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), (1, 2), filter_dilation=(1, 2))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3), filter_dilation=(2, 2))
@attr('slow')
def test_shape_Constant_tensor(self):
"""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论