提交 2dcf3753 authored 作者: Nizar Assaf's avatar Nizar Assaf

Implementation of 2D dilated convolution/correlation.

上级 c0b294ec
...@@ -1393,6 +1393,9 @@ def local_abstractconv_cudnn(node): ...@@ -1393,6 +1393,9 @@ def local_abstractconv_cudnn(node):
inp1 = node.inputs[0] inp1 = node.inputs[0]
inp2 = node.inputs[1] inp2 = node.inputs[1]
if (node.op.filter_dilation != (1, 1)):
return None
if not isinstance(inp1.type, GpuArrayType): if not isinstance(inp1.type, GpuArrayType):
return None return None
......
...@@ -2612,6 +2612,8 @@ def local_abstractconv_cudnn(node): ...@@ -2612,6 +2612,8 @@ def local_abstractconv_cudnn(node):
AbstractConv2d_gradWeights, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs))): AbstractConv2d_gradInputs))):
return None return None
if (node.op.filter_dilation != (1, 1)):
return None
inp1 = node.inputs[0] inp1 = node.inputs[0]
inp2 = node.inputs[1] inp2 = node.inputs[1]
...@@ -2619,6 +2621,7 @@ def local_abstractconv_cudnn(node): ...@@ -2619,6 +2621,7 @@ def local_abstractconv_cudnn(node):
if (not isinstance(inp1.type, CudaNdarrayType) or if (not isinstance(inp1.type, CudaNdarrayType) or
not isinstance(inp2.type, CudaNdarrayType)): not isinstance(inp2.type, CudaNdarrayType)):
return None return None
if not dnn_available(): if not dnn_available():
return None return None
......
...@@ -1622,7 +1622,8 @@ def local_conv_gemm(node): ...@@ -1622,7 +1622,8 @@ def local_conv_gemm(node):
# because we are not allowed to replace a CudaNdarray with # because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization) # a DimShuffle instance in a graph optimization)
rval = theano.sandbox.cuda.as_cuda_ndarray_variable( rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
GpuCorrMM_gradWeights(border_mode, subsample)( GpuCorrMM_gradWeights(border_mode,
subsample)(
gpu_contiguous(img.dimshuffle(1, 0, 2, 3)), gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
gpu_contiguous(kern.dimshuffle(1, 0, 2, 3)) gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
).dimshuffle(1, 0, 2, 3)) ).dimshuffle(1, 0, 2, 3))
...@@ -2769,20 +2770,25 @@ def local_abstractconv_gemm(node): ...@@ -2769,20 +2770,25 @@ def local_abstractconv_gemm(node):
border_mode = node.op.border_mode border_mode = node.op.border_mode
subsample = node.op.subsample subsample = node.op.subsample
if (border_mode == 'full') and (subsample == (1, 1)): filter_dilation = node.op.filter_dilation
if ((border_mode == 'full') and (subsample == (1, 1))):
if not node.op.filter_flip: if not node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
# need to dimshuffle the kernel for full convolution # need to dimshuffle the kernel for full convolution
kern = kern.dimshuffle(1, 0, 2, 3) kern = kern.dimshuffle(1, 0, 2, 3)
# call GpuCorrMM_gradInputs # call GpuCorrMM_gradInputs
rval = GpuCorrMM_gradInputs('valid', subsample)( rval = GpuCorrMM_gradInputs('valid',
subsample,
filter_dilation)(
gpu_contiguous(kern), gpu_contiguous(img)) gpu_contiguous(kern), gpu_contiguous(img))
else: else:
# need to flip the kernel if necessary # need to flip the kernel if necessary
if node.op.filter_flip: if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
# By default use GpuCorrMM # By default use GpuCorrMM
rval = GpuCorrMM(border_mode, subsample)(gpu_contiguous(img), rval = GpuCorrMM(border_mode,
subsample,
filter_dilation)(gpu_contiguous(img),
gpu_contiguous(kern)) gpu_contiguous(kern))
# call GpuCorrMM_gradWeights if good # call GpuCorrMM_gradWeights if good
...@@ -2790,7 +2796,7 @@ def local_abstractconv_gemm(node): ...@@ -2790,7 +2796,7 @@ def local_abstractconv_gemm(node):
# is larger than inputChannels * outputHeight * outputWidth. # is larger than inputChannels * outputHeight * outputWidth.
# GpuConv does not always store information on the batchsize and # GpuConv does not always store information on the batchsize and
# channels, though, so we only use what information we have.) # channels, though, so we only use what information we have.)
if ((subsample == (1, 1)) and if ((subsample == (1, 1)) and (filter_dilation == (1, 1)) and
(node.op.imshp is not None) and (node.op.imshp is not None) and
(None not in node.op.imshp[-2:]) and (None not in node.op.imshp[-2:]) and
(node.op.kshp is not None) and (node.op.kshp is not None) and
...@@ -2810,7 +2816,9 @@ def local_abstractconv_gemm(node): ...@@ -2810,7 +2816,9 @@ def local_abstractconv_gemm(node):
# because we are not allowed to replace a CudaNdarray with # because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization) # a DimShuffle instance in a graph optimization)
rval = theano.sandbox.cuda.as_cuda_ndarray_variable( rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
GpuCorrMM_gradWeights(border_mode, subsample)( GpuCorrMM_gradWeights(border_mode,
subsample,
filter_dilation)(
gpu_contiguous(img.dimshuffle(1, 0, 2, 3)), gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
gpu_contiguous(kern.dimshuffle(1, 0, 2, 3)) gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
).dimshuffle(1, 0, 2, 3)) ).dimshuffle(1, 0, 2, 3))
...@@ -2827,7 +2835,8 @@ def local_abstractconv_gradweight_gemm(node): ...@@ -2827,7 +2835,8 @@ def local_abstractconv_gradweight_gemm(node):
return None return None
rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode, rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample)( subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(
gpu_contiguous(img), gpu_contiguous(topgrad), shape) gpu_contiguous(img), gpu_contiguous(topgrad), shape)
if node.op.filter_flip: if node.op.filter_flip:
rval = rval[:, :, ::-1, ::-1] rval = rval[:, :, ::-1, ::-1]
...@@ -2849,7 +2858,8 @@ def local_abstractconv_gradinputs_gemm(node): ...@@ -2849,7 +2858,8 @@ def local_abstractconv_gradinputs_gemm(node):
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode, rval = GpuCorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample)( subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(
gpu_contiguous(kern), gpu_contiguous(topgrad), shape) gpu_contiguous(kern), gpu_contiguous(topgrad), shape)
return [rval] return [rval]
...@@ -2870,10 +2880,12 @@ conv_groupopt.register('local_abstractconv_dnn', ...@@ -2870,10 +2880,12 @@ conv_groupopt.register('local_abstractconv_dnn',
conv_groupopt.register('local_abstractconv_gemm', local_abstractconv_gemm, 30, conv_groupopt.register('local_abstractconv_gemm', local_abstractconv_gemm, 30,
'conv_gemm', 'conv_gemm',
'gpu', 'fast_compile', 'fast_run') 'gpu', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradweight_gemm', conv_groupopt.register('local_abstractconv_gradweight_gemm',
local_abstractconv_gradweight_gemm, 30, local_abstractconv_gradweight_gemm, 30,
'conv_gemm', 'conv_gemm',
'gpu', 'fast_compile', 'fast_run') 'gpu', 'fast_compile', 'fast_run')
conv_groupopt.register('local_abstractconv_gradinputs_gemm', conv_groupopt.register('local_abstractconv_gradinputs_gemm',
local_abstractconv_gradinputs_gemm, 30, local_abstractconv_gradinputs_gemm, 30,
'conv_gemm', 'conv_gemm',
......
...@@ -29,25 +29,30 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d): ...@@ -29,25 +29,30 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
self.provide_shape = [False] self.provide_shape = [False]
self.shared = gpu_shared self.shared = gpu_shared
def tcase(self, i, f, s, b, flip, provide_shape): def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
if fd != (1, 1):
raise SkipTest("No dilation implementation for cuDNN ConvOp.")
if not dnn_available(): if not dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
mode = mode_with_gpu mode = mode_with_gpu
o = self.get_output_shape(i, f, s, b) o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s, self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
verify_grad=True, mode=mode, verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConv) filter_flip=flip, target_op=GpuDnnConv,
filter_dilation=fd)
self.run_gradweight(inputs_shape=i, filters_shape=f, self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=True, mode=mode, verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradW) filter_flip=flip, target_op=GpuDnnConvGradW,
filter_dilation=fd)
self.run_gradinput(inputs_shape=i, filters_shape=f, self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=True, mode=mode, verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI) filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d): class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
...@@ -56,28 +61,30 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d): ...@@ -56,28 +61,30 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
self.shared = gpu_shared self.shared = gpu_shared
self.mode = mode_with_gpu.excluding('cudnn') self.mode = mode_with_gpu.excluding('cudnn')
def tcase(self, i, f, s, b, flip, provide_shape): def tcase(self, i, f, s, b, flip, provide_shape, fd=(1, 1)):
mode = self.mode mode = self.mode
o = self.get_output_shape(i, f, s, b) o = self.get_output_shape(i, f, s, b, fd)
self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s, self.run_fwd(inputs_shape=i, filters_shape=f,
verify_grad=True, mode=mode, subsample=s, verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, filter_flip=flip, target_op=(GpuCorrMM,
target_op=(GpuCorrMM,
GpuCorrMM_gradWeights, GpuCorrMM_gradWeights,
GpuCorrMM_gradInputs)) GpuCorrMM_gradInputs),
filter_dilation=fd)
self.run_gradweight(inputs_shape=i, filters_shape=f, self.run_gradweight(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=True, mode=mode, verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, filter_flip=flip,
target_op=GpuCorrMM_gradWeights) target_op=GpuCorrMM_gradWeights,
filter_dilation=fd)
self.run_gradinput(inputs_shape=i, filters_shape=f, self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, output_shape=o, subsample=s,
verify_grad=True, mode=mode, verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, filter_flip=flip,
target_op=GpuCorrMM_gradInputs) target_op=GpuCorrMM_gradInputs,
filter_dilation=fd)
class TestDnnConvTypes(test_abstract_conv.TestConvTypes): class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
......
...@@ -35,7 +35,7 @@ from .abstract_conv import conv2d as abstract_conv2d ...@@ -35,7 +35,7 @@ from .abstract_conv import conv2d as abstract_conv2d
def conv2d(input, filters, input_shape=None, filter_shape=None, def conv2d(input, filters, input_shape=None, filter_shape=None,
border_mode='valid', subsample=(1, 1), filter_flip=True, border_mode='valid', subsample=(1, 1), filter_flip=True,
image_shape=None, **kwargs): image_shape=None, filter_dilation=(1, 1), **kwargs):
""" """
This function will build the symbolic graph for convolving a mini-batch of a This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled stack of 2D inputs with a set of 2D filters. The implementation is modelled
...@@ -95,6 +95,10 @@ def conv2d(input, filters, input_shape=None, filter_shape=None, ...@@ -95,6 +95,10 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
image_shape: None, tuple/list of len 4 of int or Constant variable image_shape: None, tuple/list of len 4 of int or Constant variable
Deprecated alias for input_shape. Deprecated alias for input_shape.
filter_dilation: tuple of len 2
Factor by which to subsample (stride) the input.
Also called dilation elsewhere.
kwargs: Any other keyword arguments are accepted for backwards kwargs: Any other keyword arguments are accepted for backwards
compatibility, but will be ignored. compatibility, but will be ignored.
...@@ -140,4 +144,5 @@ def conv2d(input, filters, input_shape=None, filter_shape=None, ...@@ -140,4 +144,5 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
" be provided at the same time.") " be provided at the same time.")
return abstract_conv2d(input, filters, input_shape, filter_shape, return abstract_conv2d(input, filters, input_shape, filter_shape,
border_mode, subsample, filter_flip) border_mode, subsample, filter_flip,
filter_dilation)
差异被折叠。
...@@ -31,20 +31,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ...@@ -31,20 +31,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Loops for fast unfold + copy // Loops for fast unfold + copy
void im2col(const %(float_type)s* data_im, const int channels, void im2col(const %(float_type)s* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w, const int height, const int width, const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w, const int pad_h, const int pad_w,
const int stride_h, const int stride_w, const int stride_h, const int stride_w,
%(float_type)s* data_col) { %(float_type)s* data_col) {
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1; // Implicit dilated kernel size
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1; int dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
int dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w; int channels_col = channels * kernel_h * kernel_w;
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
int w_offset = c %% kernel_w; int w_offset = c %% kernel_w;
int h_offset = (c / kernel_w) %% kernel_h; int h_offset = (c / kernel_w) %% kernel_h;
int c_im = c / kernel_h / kernel_w; int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) { for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) { for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset; int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(npy_intp)(c * height_col + h) * width_col + w] = data_col[(npy_intp)(c * height_col + h) * width_col + w] =
data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad]; data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad];
...@@ -60,10 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels, ...@@ -60,10 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels,
// accumulated into data_im. // accumulated into data_im.
void col2im(const %(float_type)s* data_col, const int channels, void col2im(const %(float_type)s* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w, const int height, const int width, const int patch_h, const int patch_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w, const int stride_h, const int pad_h, const int pad_w, const int stride_h,
const int stride_w, %(float_type)s* data_im) { const int stride_w, %(float_type)s* data_im) {
int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1; // Implicit dilated patch
int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1; int dil_patch_h = (patch_h - 1) * dilation_h + 1;
int dil_patch_w = (patch_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1;
int num_kernels = channels * height * width; int num_kernels = channels * height * width;
int channels_col = channels * patch_h * patch_w; int channels_col = channels * patch_h * patch_w;
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
...@@ -71,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels, ...@@ -71,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels,
int h_offset = (c / patch_w) %% patch_h; int h_offset = (c / patch_w) %% patch_h;
int c_im = c / patch_h / patch_w; int c_im = c / patch_h / patch_w;
for (int h = 0; h < height_col; ++h) { for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) { for (int w = 0; w < width_col; ++w) {
int h_pad = h * stride_h - pad_h + h_offset; int w_pad = w * stride_w - pad_w + w_offset * dilation_w;
int w_pad = w * stride_w - pad_w + w_offset;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad] += data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad] +=
data_col[(npy_intp)(c * height_col + h) * width_col + w]; data_col[(npy_intp)(c * height_col + h) * width_col + w];
...@@ -96,6 +104,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -96,6 +104,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const int direction, const int direction,
const int dH = 1, const int dH = 1,
const int dW = 1, const int dW = 1,
const int dilH = 1,
const int dilW = 1,
const int padH = 0, const int padH = 0,
const int padW = 0) const int padW = 0)
{ {
...@@ -151,9 +161,12 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -151,9 +161,12 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
"CorrMM images and kernel must have the same stack size\n"); "CorrMM images and kernel must have the same stack size\n");
return NULL; return NULL;
} }
// implicit dilated filter
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth) // top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - kH) / dH + 1; const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - kW) / dW + 1; const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
if (batchSize != PyArray_DIMS(top)[0] || if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] || nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] || topHeight != PyArray_DIMS(top)[2] ||
...@@ -206,7 +219,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -206,7 +219,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for (int n = 0; n < batchSize; n++) { for (int n = 0; n < batchSize; n++) {
// First, im2col // First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels, bottomHeight, im2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col)); bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col));
// Second, gemm // Second, gemm
%(gemm)s(&NTrans, &NTrans, %(gemm)s(&NTrans, &NTrans,
&N_, &M_, &K_, &N_, &M_, &K_,
...@@ -255,7 +269,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -255,7 +269,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for (int n = 0; n < batchSize; n++) { for (int n = 0; n < batchSize; n++) {
// First, im2col // First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels, bottomHeight, im2col((%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col)); bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(col));
// Second, gemm // Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0 // Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This // for the first iteration and beta = 1 for subsequent ones. (This
...@@ -314,7 +329,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -314,7 +329,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
(%(float_type)s*)PyArray_DATA(col), &N_); (%(float_type)s*)PyArray_DATA(col), &N_);
// col2im back to the data // col2im back to the data
col2im((%(float_type)s*)PyArray_DATA(col), nChannels, bottomHeight, bottomWidth, col2im((%(float_type)s*)PyArray_DATA(col), nChannels, bottomHeight, bottomWidth,
kH, kW, padH, padW, dH, dW, (%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride); kH, kW, dilH, dilW, padH, padW,
dH, dW, (%(float_type)s*)PyArray_DATA(bottom) + n * bottom_stride);
} }
/* /*
// Original caffe code for comparison // Original caffe code for comparison
......
...@@ -79,7 +79,8 @@ def local_abstractconv_gemm(node): ...@@ -79,7 +79,8 @@ def local_abstractconv_gemm(node):
if node.op.filter_flip: if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
rval = CorrMM(border_mode=node.op.border_mode, rval = CorrMM(border_mode=node.op.border_mode,
subsample=node.op.subsample)(img, kern) subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(img, kern)
copy_stack_trace(node.outputs[0], rval) copy_stack_trace(node.outputs[0], rval)
return [rval] return [rval]
...@@ -97,7 +98,8 @@ def local_abstractconv_gradweight_gemm(node): ...@@ -97,7 +98,8 @@ def local_abstractconv_gradweight_gemm(node):
return None return None
rval = CorrMM_gradWeights(border_mode=node.op.border_mode, rval = CorrMM_gradWeights(border_mode=node.op.border_mode,
subsample=node.op.subsample)(img, topgrad, shape) subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(img, topgrad, shape)
copy_stack_trace(node.outputs[0], rval) copy_stack_trace(node.outputs[0], rval)
# need to flip the kernel if necessary # need to flip the kernel if necessary
...@@ -124,7 +126,8 @@ def local_abstractconv_gradinputs_gemm(node): ...@@ -124,7 +126,8 @@ def local_abstractconv_gradinputs_gemm(node):
if node.op.filter_flip: if node.op.filter_flip:
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
rval = CorrMM_gradInputs(border_mode=node.op.border_mode, rval = CorrMM_gradInputs(border_mode=node.op.border_mode,
subsample=node.op.subsample)(kern, topgrad, subsample=node.op.subsample,
filter_dilation=node.op.filter_dilation)(kern, topgrad,
shape) shape)
copy_stack_trace(node.outputs[0], rval) copy_stack_trace(node.outputs[0], rval)
...@@ -221,7 +224,9 @@ def local_conv2d_gradweight_cpu(node): ...@@ -221,7 +224,9 @@ def local_conv2d_gradweight_cpu(node):
assert len(op_imshp) == 4 and len(op_kshp) == 4 assert len(op_imshp) == 4 and len(op_kshp) == 4
outshp = get_conv_output_shape(op_imshp, op_kshp, outshp = get_conv_output_shape(op_imshp, op_kshp,
node.op.border_mode, node.op.subsample)[2:] node.op.border_mode,
node.op.subsample,
node.op.filter_dilation)[2:]
fulloutshp = get_conv_output_shape(op_imshp, op_kshp, fulloutshp = get_conv_output_shape(op_imshp, op_kshp,
node.op.border_mode, (1, 1))[2:] node.op.border_mode, (1, 1))[2:]
...@@ -334,7 +339,9 @@ def local_conv2d_gradinputs_cpu(node): ...@@ -334,7 +339,9 @@ def local_conv2d_gradinputs_cpu(node):
filters = filters[:, :, ::-1, ::-1] filters = filters[:, :, ::-1, ::-1]
outshp = get_conv_output_shape(op_imshp, op_kshp, outshp = get_conv_output_shape(op_imshp, op_kshp,
node.op.border_mode, node.op.subsample)[2:] node.op.border_mode,
node.op.subsample,
node.op.filter_dilation)[2:]
fulloutshp = get_conv_output_shape(op_imshp, op_kshp, fulloutshp = get_conv_output_shape(op_imshp, op_kshp,
node.op.border_mode, (1, 1))[2:] node.op.border_mode, (1, 1))[2:]
......
...@@ -32,8 +32,8 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -32,8 +32,8 @@ class TestCorr2D(utt.InferShapeTester):
def validate(self, image_shape, filter_shape, def validate(self, image_shape, filter_shape,
border_mode='valid', subsample=(1, 1), border_mode='valid', subsample=(1, 1),
input=None, filters=None, input=None, filters=None, verify_grad=True,
verify_grad=True, non_contiguous=False): non_contiguous=False, filter_dilation=(1, 1)):
""" """
:param image_shape: The constant shape info passed to corrMM. :param image_shape: The constant shape info passed to corrMM.
:param filter_shape: The constant shape info passed to corrMM. :param filter_shape: The constant shape info passed to corrMM.
...@@ -55,7 +55,8 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -55,7 +55,8 @@ class TestCorr2D(utt.InferShapeTester):
# define theano graph and function # define theano graph and function
input.name = 'input' input.name = 'input'
filters.name = 'filters' filters.name = 'filters'
rval = corr.CorrMM(border_mode, subsample)(input, filters) rval = corr.CorrMM(border_mode, subsample,
filter_dilation)(input, filters)
rval.name = 'corr_output' rval.name = 'corr_output'
return rval return rval
...@@ -86,20 +87,22 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -86,20 +87,22 @@ class TestCorr2D(utt.InferShapeTester):
orig_image_data = image_data orig_image_data = image_data
img_shape2d = numpy.array(N_image_shape[-2:]) img_shape2d = numpy.array(N_image_shape[-2:])
fil_shape2d = numpy.array(N_filter_shape[-2:]) fil_shape2d = numpy.array(N_filter_shape[-2:])
dil_shape2d = numpy.array(filter_dilation)
dil_fil_shape2d = (fil_shape2d - 1) * dil_shape2d + 1
subsample2d = numpy.array(subsample) subsample2d = numpy.array(subsample)
if border_mode == 'full': if border_mode == 'full':
padHW = (fil_shape2d - 1) padHW = (dil_fil_shape2d - 1)
elif border_mode == 'valid': elif border_mode == 'valid':
padHW = numpy.array([0, 0]) padHW = numpy.array([0, 0])
elif border_mode == 'half': elif border_mode == 'half':
padHW = numpy.floor(fil_shape2d / 2).astype('int32') padHW = numpy.floor(dil_fil_shape2d / 2).astype('int32')
elif isinstance(border_mode, tuple): elif isinstance(border_mode, tuple):
padHW = numpy.array(border_mode) padHW = numpy.array(border_mode)
elif isinstance(border_mode, integer_types): elif isinstance(border_mode, integer_types):
padHW = numpy.array([border_mode, border_mode]) padHW = numpy.array([border_mode, border_mode])
else: else:
raise NotImplementedError('Unsupported border_mode {}'.format(border_mode)) raise NotImplementedError('Unsupported border_mode {}'.format(border_mode))
out_shape2d = numpy.floor((img_shape2d + 2 * (padHW) - fil_shape2d) / subsample2d) + 1 out_shape2d = numpy.floor((img_shape2d + 2 * (padHW) - dil_fil_shape2d) / subsample2d) + 1
# avoid numpy deprecation # avoid numpy deprecation
out_shape2d = out_shape2d.astype('int32') out_shape2d = out_shape2d.astype('int32')
out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d) out_shape = (N_image_shape[0], N_filter_shape[0]) + tuple(out_shape2d)
...@@ -124,8 +127,8 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -124,8 +127,8 @@ class TestCorr2D(utt.InferShapeTester):
for col in range(ref_output.shape[3]): for col in range(ref_output.shape[3]):
icol = col * subsample[1] # image col icol = col * subsample[1] # image col
ref_output[bb, nn, row, col] += (image2d[ ref_output[bb, nn, row, col] += (image2d[
irow:irow + N_filter_shape[2], irow:irow + dil_fil_shape2d[0]:filter_dilation[0],
icol:icol + N_filter_shape[3]] * filter2d[::-1, ::-1] icol:icol + dil_fil_shape2d[1]:filter_dilation[1]] * filter2d[::-1, ::-1]
).sum() ).sum()
self.assertTrue(_allclose(theano_output, ref_output)) self.assertTrue(_allclose(theano_output, ref_output))
...@@ -186,6 +189,28 @@ class TestCorr2D(utt.InferShapeTester): ...@@ -186,6 +189,28 @@ class TestCorr2D(utt.InferShapeTester):
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3)) self.validate((1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3))
def test_filter_dilation(self):
"""
Tests correlation where filter dilation != (1,1)
"""
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'valid', filter_dilation=(2, 2))
self.validate((3, 2, 14, 10), (5, 2, 2, 3), 'valid', filter_dilation=(3, 1))
self.validate((1, 1, 14, 14), (1, 1, 3, 3), 'valid', filter_dilation=(2, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', filter_dilation=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'full', filter_dilation=(3, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 'full', filter_dilation=(2, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'half', filter_dilation=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), 'half', filter_dilation=(3, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 'half', filter_dilation=(2, 3))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (1, 1), filter_dilation=(2, 2))
self.validate((3, 2, 7, 5), (5, 2, 2, 3), (2, 1), filter_dilation=(2, 1))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), (1, 2), filter_dilation=(1, 2))
self.validate((1, 1, 6, 6), (1, 1, 3, 3), 1, subsample=(3, 3), filter_dilation=(2, 2))
@attr('slow') @attr('slow')
def test_shape_Constant_tensor(self): def test_shape_Constant_tensor(self):
""" """
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论