提交 ea62004b authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #6331 from vikramnitin9/dilated_causal

Dilated causal convolution
...@@ -449,8 +449,8 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -449,8 +449,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
Parameters Parameters
---------- ----------
border_mode : {'valid', 'full', 'half'} border_mode : {'valid', 'full', 'half'}
Additionally, the padding size could be directly specified by an integer Additionally, the padding size could be directly specified by an integer,
or a pair of integers a pair of integers, or two pairs of integers.
subsample subsample
Perform subsampling of the output (default: (1, 1)). Perform subsampling of the output (default: (1, 1)).
filter_dilation filter_dilation
...@@ -468,16 +468,33 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -468,16 +468,33 @@ class BaseGpuCorrMM(CGpuKernelBase):
def __init__(self, border_mode="valid", subsample=(1, 1), def __init__(self, border_mode="valid", subsample=(1, 1),
filter_dilation=(1, 1), num_groups=1, unshared=False): filter_dilation=(1, 1), num_groups=1, unshared=False):
if isinstance(border_mode, integer_types): if isinstance(border_mode, integer_types):
border_mode = (border_mode, border_mode) if border_mode < 0:
if isinstance(border_mode, tuple): raise ValueError(
pad_h, pad_w = map(int, border_mode) 'invalid border_mode {}, which must be a '
border_mode = (pad_h, pad_w) 'non-negative integer'.format(border_mode))
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or border_mode = ((border_mode, border_mode),) * 2
border_mode in ('valid', 'full', 'half')): elif isinstance(border_mode, tuple):
if len(border_mode) != 2:
raise ValueError(
'invalid border_mode {} which must be a '
'tuple of length 2'.format(border_mode))
border = ()
for mode in border_mode:
if isinstance(mode, tuple) and len(mode) == 2 and \
min(mode) >= 0:
border += ((int(mode[0]), int(mode[1])),)
elif mode >= 0:
border += ((int(mode), int(mode)),)
else:
raise ValueError(
'invalid border mode {}. The tuple can only contain '
'integers or tuples of length 2'.format(border_mode))
border_mode = border
elif border_mode not in ('valid', 'full', 'half'):
raise ValueError( raise ValueError(
'invalid border_mode {}, which must be either ' 'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a pair of' '"valid", "full", "half", an integer or a tuple '
' integers'.format(border_mode)) 'of length 2'.format(border_mode))
self.border_mode = border_mode self.border_mode = border_mode
if len(subsample) != 2: if len(subsample) != 2:
raise ValueError("subsample must have two elements") raise ValueError("subsample must have two elements")
...@@ -495,7 +512,7 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -495,7 +512,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def pad(self): def pad(self):
if self.border_mode != 'valid': if self.border_mode != 'valid':
return self.border_mode return self.border_mode
return (0, 0) return ((0, 0),) * 2
def __str__(self): def __str__(self):
return '%s{%s, %s, %s, %s, %s}' % ( return '%s{%s, %s, %s, %s, %s}' % (
...@@ -537,7 +554,7 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -537,7 +554,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def c_code_cache_version(self): def c_code_cache_version(self):
# Raise this whenever modifying the C code (including the file). # Raise this whenever modifying the C code (including the file).
return (11,) return (12,)
def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None): def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None):
""" """
...@@ -587,14 +604,14 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -587,14 +604,14 @@ class BaseGpuCorrMM(CGpuKernelBase):
numgroups = self.num_groups numgroups = self.num_groups
unshared = int(self.unshared) unshared = int(self.unshared)
if self.border_mode == "half": if self.border_mode == "half":
padH = padW = -1 padH_l = padH_r = padW_l = padW_r = -1
elif self.border_mode == "full": elif self.border_mode == "full":
padH = padW = -2 padH_l = padH_r = padW_l = padW_r = -2
elif isinstance(self.border_mode, tuple): elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode (padH_l, padH_r), (padW_l, padW_r) = self.border_mode
else: else:
assert self.border_mode == "valid" assert self.border_mode == "valid"
padH = padW = 0 padH_l = padH_r = padW_l = padW_r = 0
if direction == "forward": if direction == "forward":
direction = 0 direction = 0
out = top out = top
...@@ -613,13 +630,13 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -613,13 +630,13 @@ class BaseGpuCorrMM(CGpuKernelBase):
if height: if height:
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height height = '(*(npy_int*)(PyArray_DATA(%s)))' % height
else: else:
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)): if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH_l == -1 or padH_r == -1)):
raise ValueError("height must be given for backprop with vertical sampling or pad='half'") raise ValueError("height must be given for backprop with vertical sampling or pad='half'")
height = '-1' height = '-1'
if width: if width:
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width width = '(*(npy_int*)(PyArray_DATA(%s)))' % width
else: else:
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)): if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW_l == -1 or padW_r == -1)):
raise ValueError("width must be given for backprop with horizontal sampling or pad='half'") raise ValueError("width must be given for backprop with horizontal sampling or pad='half'")
width = '-1' width = '-1'
...@@ -635,8 +652,10 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -635,8 +652,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
size_t dW = %(dW)s; size_t dW = %(dW)s;
size_t dilH = %(dilH)s; size_t dilH = %(dilH)s;
size_t dilW = %(dilW)s; size_t dilW = %(dilW)s;
int padH = %(padH)s; int padH_l = %(padH_l)s;
int padW = %(padW)s; int padH_r = %(padH_r)s;
int padW_l = %(padW_l)s;
int padW_r = %(padW_r)s;
int numgroups = %(numgroups)s; int numgroups = %(numgroups)s;
int unshared = %(unshared)s; int unshared = %(unshared)s;
...@@ -662,22 +681,22 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -662,22 +681,22 @@ class BaseGpuCorrMM(CGpuKernelBase):
// kernel height is specified (perhaps vertical subsampling or half padding) // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH_l == -2 || padH_r == -2) {
// vertical full padding, we can infer the kernel height // vertical full padding, we can infer the kernel height
kH = (2 - PyGpuArray_DIMS(bottom)[2] + (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1; kH = (2 - PyGpuArray_DIMS(bottom)[2] + (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1;
} }
else { else {
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ; kH = (PyGpuArray_DIMS(bottom)[2] + padH_l + padH_r - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
} }
if (%(width)s != -1) { if (%(width)s != -1) {
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW_l == -2 || padW_r == -2) {
kW = (2 - PyGpuArray_DIMS(bottom)[3] + (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (2 - PyGpuArray_DIMS(bottom)[3] + (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
else { else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (PyGpuArray_DIMS(bottom)[3] + padW_l + padW_r - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
} }
...@@ -686,23 +705,23 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -686,23 +705,23 @@ class BaseGpuCorrMM(CGpuKernelBase):
dil_kW = (kW - 1) * dilW + 1; dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested // Auto-padding if requested
if (padH == -1) { // vertical half padding if (padH_l == -1 || padH_r == -1) { // vertical half padding
padH = dil_kH / 2; padH_l = padH_r = dil_kH / 2;
} }
else if (padH == -2) { // vertical full padding else if (padH_l == -2 || padH_r == -2) { // vertical full padding
padH = dil_kH - 1; padH_l = padH_r = dil_kH - 1;
} }
else if (padH < 0) { else if (padH_l < 0 || padH_r < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
%(fail)s %(fail)s
} }
if (padW == -1) { // horizontal half padding if (padW_l == -1 || padW_r == -1) { // horizontal half padding
padW = dil_kW / 2; padW_l = padW_r = dil_kW / 2;
} }
else if (padW == -2) { // horizontal full padding else if (padW_l == -2 || padW_r == -2) { // horizontal full padding
padW = dil_kW - 1; padW_l = padW_r = dil_kW - 1;
} }
else if (padW < 0) { else if (padW_l < 0 || padW_r < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
%(fail)s %(fail)s
} }
...@@ -718,11 +737,11 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -718,11 +737,11 @@ class BaseGpuCorrMM(CGpuKernelBase):
switch(direction) { switch(direction) {
case 0: // forward pass case 0: // forward pass
// output is top: (batchsize, num_filters, height, width) // output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1 // height and width: top = (bottom + pad_l + pad_r - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = PyGpuArray_DIMS(bottom)[0]; out_dim[0] = PyGpuArray_DIMS(bottom)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[0]; out_dim[1] = PyGpuArray_DIMS(weights)[0];
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] + 2*padH - ((PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1; out_dim[2] = (PyGpuArray_DIMS(bottom)[2] + padH_l + padH_r - ((PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1; out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + padW_l + padW_r - ((PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1;
out_typecode = bottom->ga.typecode; out_typecode = bottom->ga.typecode;
out_context = bottom->context; out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0) if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
...@@ -810,8 +829,8 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -810,8 +829,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0]; out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[wdim-3] * numgroups; out_dim[1] = PyGpuArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - 2*padH; out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - padH_l - padH_r;
out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - 2*padW; out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - padW_l - padW_r;
out_typecode = top->ga.typecode; out_typecode = top->ga.typecode;
out_context = top->context; out_context = top->context;
if (unshared) { if (unshared) {
...@@ -884,7 +903,8 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -884,7 +903,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
} }
// Call GPU code // Call GPU code
out2 = corrMM(%(bottom)s, %(weights)s, %(top)s, direction, dH, dW, dilH, dilW, padH, padW, numgroups, unshared); out2 = corrMM(%(bottom)s, %(weights)s, %(top)s, direction, dH, dW, dilH, dilW,
padH_l, padH_r, padW_l, padW_r, numgroups, unshared);
if (out2==NULL){ if (out2==NULL){
%(fail)s %(fail)s
} }
...@@ -907,8 +927,11 @@ class GpuCorrMM(BaseGpuCorrMM): ...@@ -907,8 +927,11 @@ class GpuCorrMM(BaseGpuCorrMM):
``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'`` ``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'``
for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution), for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution),
``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same ``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same
convolution for odd-sized kernels). Note that the two widths are each convolution for odd-sized kernels).
applied twice, once per side (left and right, top and bottom). If it is a tuple containing 2 pairs of integers, then these specify
the padding to be applied on each side ((left, right), (top, bottom)).
Otherwise, each width is applied twice, once per side (left and right,
top and bottom).
subsample subsample
The subsample operation applied to each output image. The subsample operation applied to each output image.
Should be a tuple with 2 elements. Should be a tuple with 2 elements.
......
...@@ -43,7 +43,7 @@ KERNEL void dilated_im2col_kernel(const ga_size n, ...@@ -43,7 +43,7 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
const ga_size height, const ga_size width, const ga_size height, const ga_size width,
const ga_size kernel_h, const ga_size kernel_w, const ga_size kernel_h, const ga_size kernel_w,
const ga_size dilation_h, const ga_size dilation_w, const ga_size dilation_h, const ga_size dilation_w,
const ga_size pad_h, const ga_size pad_w, const ga_size pad_hl, const ga_size pad_wl,
const ga_size stride_h, const ga_size stride_w, const ga_size stride_h, const ga_size stride_w,
const ga_size height_col, const ga_size width_col, const ga_size height_col, const ga_size width_col,
GLOBAL_MEM DTYPE_INPUT_0 * data_col, GLOBAL_MEM DTYPE_INPUT_0 * data_col,
...@@ -58,8 +58,8 @@ KERNEL void dilated_im2col_kernel(const ga_size n, ...@@ -58,8 +58,8 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
const ga_size w_col = index % width_col; const ga_size w_col = index % width_col;
const ga_size c_im = h_index / height_col; const ga_size c_im = h_index / height_col;
const ga_size c_col = c_im * kernel_h * kernel_w; const ga_size c_col = c_im * kernel_h * kernel_w;
const ga_size h_offset = h_col * stride_h - pad_h; const ga_size h_offset = h_col * stride_h - pad_hl;
const ga_size w_offset = w_col * stride_w - pad_w; const ga_size w_offset = w_col * stride_w - pad_wl;
GLOBAL_MEM DTYPE_INPUT_0 * data_col_ptr = data_col; GLOBAL_MEM DTYPE_INPUT_0 * data_col_ptr = data_col;
data_col_ptr += (c_col * height_col + h_col) * width_col + w_col; data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
GLOBAL_MEM const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset; GLOBAL_MEM const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset;
...@@ -88,7 +88,7 @@ KERNEL void im2col_kernel(const ga_size n, ...@@ -88,7 +88,7 @@ KERNEL void im2col_kernel(const ga_size n,
// data_im_offset is an offset of elements in the array // data_im_offset is an offset of elements in the array
const ga_size height, const ga_size width, const ga_size height, const ga_size width,
const ga_size kernel_h, const ga_size kernel_w, const ga_size kernel_h, const ga_size kernel_w,
const ga_size pad_h, const ga_size pad_w, const ga_size pad_hl, const ga_size pad_wl,
const ga_size stride_h, const ga_size stride_w, const ga_size stride_h, const ga_size stride_w,
const ga_size height_col, const ga_size width_col, const ga_size height_col, const ga_size width_col,
GLOBAL_MEM DTYPE_INPUT_0 * data_col, GLOBAL_MEM DTYPE_INPUT_0 * data_col,
...@@ -103,8 +103,8 @@ KERNEL void im2col_kernel(const ga_size n, ...@@ -103,8 +103,8 @@ KERNEL void im2col_kernel(const ga_size n,
const ga_size w_col = index % width_col; const ga_size w_col = index % width_col;
const ga_size c_im = h_index / height_col; const ga_size c_im = h_index / height_col;
const ga_size c_col = c_im * kernel_h * kernel_w; const ga_size c_col = c_im * kernel_h * kernel_w;
const ga_size h_offset = h_col * stride_h - pad_h; const ga_size h_offset = h_col * stride_h - pad_hl;
const ga_size w_offset = w_col * stride_w - pad_w; const ga_size w_offset = w_col * stride_w - pad_wl;
GLOBAL_MEM DTYPE_INPUT_0 * data_col_ptr = data_col; GLOBAL_MEM DTYPE_INPUT_0 * data_col_ptr = data_col;
data_col_ptr += (c_col * height_col + h_col) * width_col + w_col; data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
GLOBAL_MEM const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset; GLOBAL_MEM const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset;
...@@ -131,7 +131,7 @@ KERNEL void dilated_col2im_kernel(const ga_size n, ...@@ -131,7 +131,7 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
const ga_size height, const ga_size width, const ga_size channels, const ga_size height, const ga_size width, const ga_size channels,
const ga_size kernel_h, const ga_size kernel_w, const ga_size kernel_h, const ga_size kernel_w,
const ga_size dilation_h, const ga_size dilation_w, const ga_size dilation_h, const ga_size dilation_w,
const ga_size pad_h, const ga_size pad_w, const ga_size pad_hl, const ga_size pad_wl,
const ga_size stride_h, const ga_size stride_w, const ga_size stride_h, const ga_size stride_w,
const ga_size height_col, const ga_size width_col, const ga_size height_col, const ga_size width_col,
GLOBAL_MEM DTYPE_INPUT_0 * data_im, GLOBAL_MEM DTYPE_INPUT_0 * data_im,
...@@ -145,8 +145,8 @@ KERNEL void dilated_col2im_kernel(const ga_size n, ...@@ -145,8 +145,8 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
for (ga_size index = GID_0 * LDIM_0 + LID_0; for (ga_size index = GID_0 * LDIM_0 + LID_0;
index < (n); index += LDIM_0 * GDIM_0) { index < (n); index += LDIM_0 * GDIM_0) {
DTYPE_INPUT_0 val = 0; DTYPE_INPUT_0 val = 0;
const ga_size w_im = index % width + pad_w; const ga_size w_im = index % width + pad_wl;
const ga_size h_im = (index / width) % height + pad_h; const ga_size h_im = (index / width) % height + pad_hl;
const ga_size c_im = index / (width * height); const ga_size c_im = index / (width * height);
ga_size kernel_extent_w = (kernel_w - 1) * dilation_w + 1; ga_size kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
ga_size kernel_extent_h = (kernel_h - 1) * dilation_h + 1; ga_size kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
...@@ -182,7 +182,7 @@ KERNEL void col2im_kernel(const ga_size n, ...@@ -182,7 +182,7 @@ KERNEL void col2im_kernel(const ga_size n,
GLOBAL_MEM const DTYPE_INPUT_0 * data_col, const ga_size offset_col, GLOBAL_MEM const DTYPE_INPUT_0 * data_col, const ga_size offset_col,
const ga_size height, const ga_size width, const ga_size channels, const ga_size height, const ga_size width, const ga_size channels,
const ga_size kernel_h, const ga_size kernel_w, const ga_size kernel_h, const ga_size kernel_w,
const ga_size pad_h, const ga_size pad_w, const ga_size pad_hl, const ga_size pad_wl,
const ga_size stride_h, const ga_size stride_w, const ga_size stride_h, const ga_size stride_w,
const ga_size height_col, const ga_size width_col, const ga_size height_col, const ga_size width_col,
GLOBAL_MEM DTYPE_INPUT_0 * data_im, GLOBAL_MEM DTYPE_INPUT_0 * data_im,
...@@ -196,8 +196,8 @@ KERNEL void col2im_kernel(const ga_size n, ...@@ -196,8 +196,8 @@ KERNEL void col2im_kernel(const ga_size n,
for (ga_size index = GID_0 * LDIM_0 + LID_0; for (ga_size index = GID_0 * LDIM_0 + LID_0;
index < (n); index += LDIM_0 * GDIM_0) { index < (n); index += LDIM_0 * GDIM_0) {
DTYPE_INPUT_0 val = 0; DTYPE_INPUT_0 val = 0;
const ga_size w_im = index % width + pad_w; const ga_size w_im = index % width + pad_wl;
const ga_size h_im = (index / width) % height + pad_h; const ga_size h_im = (index / width) % height + pad_hl;
const ga_size c_im = index / (width * height); const ga_size c_im = index / (width * height);
// compute the start and end of the output // compute the start and end of the output
const ga_size w_col_start = const ga_size w_col_start =
...@@ -259,15 +259,16 @@ int rgemm(cb_order o, cb_transpose tA, cb_transpose tB, ...@@ -259,15 +259,16 @@ int rgemm(cb_order o, cb_transpose tA, cb_transpose tB,
int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels, int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels,
const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w,
const size_t dilation_h, const size_t dilation_w, const size_t dilation_h, const size_t dilation_w,
const size_t pad_h, const size_t pad_w, const size_t pad_hl, const size_t pad_hr,
const size_t pad_wl, const size_t pad_wr,
const size_t stride_h, const size_t stride_w, const size_t stride_h, const size_t stride_w,
GpuArray *data_col) { GpuArray *data_col) {
// We are going to launch channels * height_col * width_col kernels, each // We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid. // kernel responsible for copying a single-channel grid.
size_t dil_kernel_h = (kernel_h - 1) * dilation_h + 1; size_t dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
size_t dil_kernel_w = (kernel_w - 1) * dilation_w + 1; size_t dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
size_t height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1; size_t height_col = (height + pad_hl + pad_hr - dil_kernel_h) / stride_h + 1;
size_t width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1; size_t width_col = (width + pad_wl + pad_wr - dil_kernel_w) / stride_w + 1;
size_t num_kernels = channels * height_col * width_col; size_t num_kernels = channels * height_col * width_col;
int err; int err;
if (dilation_h != 1 || dilation_w != 1) { if (dilation_h != 1 || dilation_w != 1) {
...@@ -275,7 +276,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels ...@@ -275,7 +276,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
1, &num_kernels, 0, 1, &num_kernels, 0,
num_kernels, data_im->data, data_im->offset, data_im_offset, num_kernels, data_im->data, data_im->offset, data_im_offset,
height, width, kernel_h, kernel_w, height, width, kernel_h, kernel_w,
dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w, height_col, dilation_h, dilation_w, pad_hl, pad_wl, stride_h, stride_w, height_col,
width_col, data_col->data, data_col->offset); width_col, data_col->data, data_col->offset);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -287,7 +288,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels ...@@ -287,7 +288,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
1, &num_kernels, 0, 1, &num_kernels, 0,
num_kernels, data_im->data, data_im->offset, data_im_offset, num_kernels, data_im->data, data_im->offset, data_im_offset,
height, width, kernel_h, kernel_w, height, width, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, height_col, pad_hl, pad_wl, stride_h, stride_w, height_col,
width_col, data_col->data, data_col->offset); width_col, data_col->data, data_col->offset);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -301,12 +302,12 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels ...@@ -301,12 +302,12 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
int col2im(GpuArray *data_col, const size_t channels, int col2im(GpuArray *data_col, const size_t channels,
const size_t height, const size_t width, const size_t patch_h, const size_t patch_w, const size_t height, const size_t width, const size_t patch_h, const size_t patch_w,
const size_t dilation_h, const size_t dilation_w, const size_t dilation_h, const size_t dilation_w,
const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t pad_hl, const size_t pad_hr, const size_t pad_wl, const size_t pad_wr,
const size_t stride_w, GpuArray *data_im, const size_t data_im_offset) { const size_t stride_h, const size_t stride_w, GpuArray *data_im, const size_t data_im_offset) {
size_t dil_patch_h = (patch_h - 1) * dilation_h + 1; size_t dil_patch_h = (patch_h - 1) * dilation_h + 1;
size_t dil_patch_w = (patch_w - 1) * dilation_w + 1; size_t dil_patch_w = (patch_w - 1) * dilation_w + 1;
size_t height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1; size_t height_col = (height + pad_hl + pad_hr - dil_patch_h) / stride_h + 1;
size_t width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1; size_t width_col = (width + pad_wl + pad_wr - dil_patch_w) / stride_w + 1;
size_t num_kernels = channels * height * width; size_t num_kernels = channels * height * width;
// To avoid involving atomic operations, we will launch one kernel per // To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions. // bottom dimension, and then in the kernel add up the top dimensions.
...@@ -316,7 +317,7 @@ int col2im(GpuArray *data_col, const size_t channels, ...@@ -316,7 +317,7 @@ int col2im(GpuArray *data_col, const size_t channels,
1, &num_kernels, 0, 1, &num_kernels, 0,
num_kernels, data_col->data, data_col->offset, num_kernels, data_col->data, data_col->offset,
height, width, channels, patch_h, patch_w, height, width, channels, patch_h, patch_w,
dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, pad_hl, pad_wl, stride_h, stride_w,
height_col, width_col, data_im->data, data_im->offset, data_im_offset); height_col, width_col, data_im->data, data_im->offset, data_im_offset);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -328,7 +329,7 @@ int col2im(GpuArray *data_col, const size_t channels, ...@@ -328,7 +329,7 @@ int col2im(GpuArray *data_col, const size_t channels,
1, &num_kernels, 0, 1, &num_kernels, 0,
num_kernels, data_col->data, data_col->offset, num_kernels, data_col->data, data_col->offset,
height, width, channels, patch_h, patch_w, height, width, channels, patch_h, patch_w,
pad_h, pad_w, stride_h, stride_w, pad_hl, pad_wl, stride_h, stride_w,
height_col, width_col, data_im->data, data_im->offset, data_im_offset); height_col, width_col, data_im->data, data_im->offset, data_im_offset);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -352,8 +353,10 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -352,8 +353,10 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const size_t dW = 1, const size_t dW = 1,
const size_t dilH = 1, const size_t dilH = 1,
const size_t dilW = 1, const size_t dilW = 1,
const size_t padH = 0, const size_t padH_l = 0,
const size_t padW = 0, const size_t padH_r = 0,
const size_t padW_l = 0,
const size_t padW_r = 0,
const size_t numgroups = 1, const size_t numgroups = 1,
const size_t unshared = 0) const size_t unshared = 0)
{ {
...@@ -448,8 +451,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -448,8 +451,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const size_t dil_kH = (kH - 1) * dilH + 1; const size_t dil_kH = (kH - 1) * dilH + 1;
const size_t dil_kW = (kW - 1) * dilW + 1; const size_t dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth) // top: (batchSize, nFilters, topHeight, topWidth)
const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH); const size_t topHeightNoDH = (bottomHeight + padH_l + padH_r - dil_kH);
const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW); const size_t topWidthNoDW = (bottomWidth + padW_l + padW_r - dil_kW);
// the above values might be negative so we need to use Python-like // the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output. // flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only // note: this macro implements Python's // for negative x only
...@@ -563,7 +566,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -563,7 +566,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
err = im2col(&bottom->ga, n * batch_bottom_stride, err = im2col(&bottom->ga, n * batch_bottom_stride,
nChannels, bottomHeight, nChannels, bottomHeight,
bottomWidth, kH, kW, dilH, dilW, bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, &col->ga); padH_l, padH_r, padW_l, padW_r, dH, dW, &col->ga);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
Py_DECREF(col); Py_DECREF(col);
return NULL; return NULL;
...@@ -623,7 +626,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -623,7 +626,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
err = im2col(&bottom->ga, n * batch_bottom_stride, err = im2col(&bottom->ga, n * batch_bottom_stride,
nChannels, bottomHeight, nChannels, bottomHeight,
bottomWidth, kH, kW, dilH, dilW, bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, &col->ga); padH_l, padH_r, padW_l, padW_r, dH, dW, &col->ga);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
Py_DECREF(col); Py_DECREF(col);
return NULL; return NULL;
...@@ -717,7 +720,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -717,7 +720,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
} }
// col2im back to the data // col2im back to the data
err = col2im(&col->ga, nChannels, bottomHeight, bottomWidth, err = col2im(&col->ga, nChannels, bottomHeight, bottomWidth,
kH, kW, dilH, dilW, padH, padW, kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r,
dH, dW, &bottom->ga, n * batch_bottom_stride); dH, dW, &bottom->ga, n * batch_bottom_stride);
if (err != GA_NO_ERROR) { if (err != GA_NO_ERROR) {
Py_DECREF(col); Py_DECREF(col);
......
...@@ -3084,6 +3084,10 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -3084,6 +3084,10 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if op.unshared: if op.unshared:
return None return None
if isinstance(op.border_mode, tuple) and any(isinstance(p, tuple) for p in op.border_mode):
# Asymmetric padding not yet supported
return None
inp1 = inputs[0] inp1 = inputs[0]
inp2 = inputs[1] inp2 = inputs[1]
...@@ -3180,6 +3184,9 @@ def local_abstractconv_cudnn(node): ...@@ -3180,6 +3184,9 @@ def local_abstractconv_cudnn(node):
return return
if node.op.unshared: if node.op.unshared:
return None return None
if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
# Asymmetric padding not yet supported
return None
if isinstance(node.op, AbstractConv2d): if isinstance(node.op, AbstractConv2d):
with inherit_stack_trace(node.outputs): with inherit_stack_trace(node.outputs):
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
...@@ -3198,6 +3205,9 @@ def local_abstractconv_cudnn_alt(node): ...@@ -3198,6 +3205,9 @@ def local_abstractconv_cudnn_alt(node):
return None return None
if node.op.unshared: if node.op.unshared:
return None return None
if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
# Asymmetric padding not yet supported
return None
inp1 = node.inputs[0] inp1 = node.inputs[0]
inp2 = node.inputs[1] inp2 = node.inputs[1]
...@@ -3407,6 +3417,9 @@ def local_abstractconv_gw_cudnn(node): ...@@ -3407,6 +3417,9 @@ def local_abstractconv_gw_cudnn(node):
return return
if node.op.unshared: if node.op.unshared:
return None return None
if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
# Asymmetric padding not yet supported
return None
if isinstance(node.op, AbstractConv2d_gradWeights): if isinstance(node.op, AbstractConv2d_gradWeights):
with inherit_stack_trace(node.outputs): with inherit_stack_trace(node.outputs):
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
...@@ -3422,6 +3435,9 @@ def local_abstractconv_gi_cudnn(node): ...@@ -3422,6 +3435,9 @@ def local_abstractconv_gi_cudnn(node):
return return
if node.op.unshared: if node.op.unshared:
return None return None
if isinstance(node.op.border_mode, tuple) and any(isinstance(p, tuple) for p in node.op.border_mode):
# Asymmetric padding not yet supported
return None
if isinstance(node.op, AbstractConv2d_gradInputs): if isinstance(node.op, AbstractConv2d_gradInputs):
with inherit_stack_trace(node.outputs): with inherit_stack_trace(node.outputs):
return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs) return local_abstractconv_cudnn_graph(node.op, ctx, node.inputs, node.outputs)
......
...@@ -12,6 +12,7 @@ from ..type import gpuarray_shared_constructor ...@@ -12,6 +12,7 @@ from ..type import gpuarray_shared_constructor
from ..blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs from ..blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs
from .config import mode_with_gpu, mode_without_gpu, ref_cast from .config import mode_with_gpu, mode_without_gpu, ref_cast
from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv
from theano.tensor.nnet.tests.test_abstract_conv import TestAsymmetricPadding, TestCausalConv
class TestCorrMM(unittest.TestCase): class TestCorrMM(unittest.TestCase):
...@@ -272,3 +273,14 @@ class TestUnsharedGpuCorr2d(TestUnsharedConv): ...@@ -272,3 +273,14 @@ class TestUnsharedGpuCorr2d(TestUnsharedConv):
conv2d_op = GpuCorrMM conv2d_op = GpuCorrMM
conv2d_gradw_op = GpuCorrMM_gradWeights conv2d_gradw_op = GpuCorrMM_gradWeights
conv2d_gradi_op = GpuCorrMM_gradInputs conv2d_gradi_op = GpuCorrMM_gradInputs
class TestAsymmetricGpu(TestAsymmetricPadding):
mode = mode_with_gpu
conv2d_op = GpuCorrMM
conv2d_gradw_op = GpuCorrMM_gradWeights
conv2d_gradi_op = GpuCorrMM_gradInputs
class TestCausalGpuCorr(TestCausalConv):
mode = mode_with_gpu
...@@ -72,7 +72,7 @@ def conv2d(input, filters, input_shape=None, filter_shape=None, ...@@ -72,7 +72,7 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
You can give ``None`` for any element of the list to specify that this You can give ``None`` for any element of the list to specify that this
element is not known at compile time. element is not known at compile time.
border_mode: str, int or tuple of two int border_mode: str, int or a tuple of two ints or pairs of ints
Either of the following: Either of the following:
``'valid'``: apply filter wherever it completely overlaps with the ``'valid'``: apply filter wherever it completely overlaps with the
...@@ -85,8 +85,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None, ...@@ -85,8 +85,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
leads to the output shape being equal to the input shape. leads to the output shape being equal to the input shape.
``int``: pad input with a symmetric border of zeros of the given ``int``: pad input with a symmetric border of zeros of the given
width, then perform a valid convolution. width, then perform a valid convolution.
``(int1, int2)``: pad input with a symmetric border of ``int1`` rows ``(int1, int2)``: (for 2D) pad input with a symmetric border of ``int1``,
and ``int2`` columns, then perform a valid convolution. ``int2``, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``: (for 2D)
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
subsample: tuple of len 2 subsample: tuple of len 2
Factor by which to subsample the output. Factor by which to subsample the output.
......
...@@ -53,9 +53,10 @@ def get_conv_output_shape(image_shape, kernel_shape, ...@@ -53,9 +53,10 @@ def get_conv_output_shape(image_shape, kernel_shape,
input channels, height and width of the kernel. input channels, height and width of the kernel.
None where undefined. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'. or numeric) or pairs of ints. If it is a string, it must be 'valid',
If it is a tuple, its two (or three) elements respectively correspond 'half' or 'full'. If it is a tuple, its two (or three) elements respectively
to the padding on height and width (and possibly depth) axis. correspond to the padding on height and width (and possibly depth)
axis. For asymmetric padding, provide a pair of ints for each dimension.
subsample: tuple of int (symbolic or numeric). Its two or three elements subsample: tuple of int (symbolic or numeric). Its two or three elements
espectively correspond to the subsampling on height and width (and espectively correspond to the subsampling on height and width (and
possibly depth) axis. possibly depth) axis.
...@@ -103,9 +104,11 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode, ...@@ -103,9 +104,11 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
given axis. None if undefined. given axis. None if undefined.
kernel_shape: int or None. Corresponds to the kernel shape on a given kernel_shape: int or None. Corresponds to the kernel shape on a given
axis. None if undefined. axis. None if undefined.
border_mode: string or int. If it is a string, it must be border_mode: string, int or tuple of 2 ints. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to 'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis. the padding on the considered axis. If it is a tuple, its two elements
must correspond to the asymmetric padding (e.g., left and right) on
the considered axis.
subsample: int. It must correspond to the subsampling on the subsample: int. It must correspond to the subsampling on the
considered axis. considered axis.
dilation: int. It must correspond to the dilation on the dilation: int. It must correspond to the dilation on the
...@@ -123,22 +126,26 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode, ...@@ -123,22 +126,26 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
# Implicit dilated kernel shape # Implicit dilated kernel shape
dil_kernel_shape = (kernel_shape - 1) * dilation + 1 dil_kernel_shape = (kernel_shape - 1) * dilation + 1
if border_mode == "half": if border_mode == "half":
pad = dil_kernel_shape // 2 pad_l = pad_r = dil_kernel_shape // 2
elif border_mode == "full": elif border_mode == "full":
pad = dil_kernel_shape - 1 pad_l = pad_r = dil_kernel_shape - 1
elif border_mode == "valid": elif border_mode == "valid":
pad = 0 pad_l = pad_r = 0
else: else:
pad = border_mode if isinstance(border_mode, tuple):
if pad < 0: pad_l, pad_r = border_mode
else:
pad_l = pad_r = border_mode
if pad_l < 0 or pad_r < 0:
raise ValueError("border_mode must be >= 0") raise ValueError("border_mode must be >= 0")
# In case of symbolic shape, we want to build the smallest graph # In case of symbolic shape, we want to build the smallest graph
# (image_shape + 2 * pad - dil_kernel_shape) // subsample + 1 # (image_shape + 2 * pad - dil_kernel_shape) // subsample + 1
if pad == 0: out_shp = (image_shape - dil_kernel_shape)
out_shp = (image_shape - dil_kernel_shape) if pad_l != 0:
else: out_shp += pad_l
out_shp = (image_shape + 2 * pad - dil_kernel_shape) if pad_r != 0:
out_shp += pad_r
if subsample != 1: if subsample != 1:
out_shp = out_shp // subsample out_shp = out_shp // subsample
out_shp = out_shp + 1 out_shp = out_shp + 1
...@@ -168,9 +175,10 @@ def get_conv_gradweights_shape(image_shape, top_shape, ...@@ -168,9 +175,10 @@ def get_conv_gradweights_shape(image_shape, top_shape,
to: batch size, number of output channels, height and width (and to: batch size, number of output channels, height and width (and
possibly depth) of the image. None where undefined. possibly depth) of the image. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'. or numeric) or pairs of ints. If it is a string, it must be 'valid',
If it is a tuple, its two (or three) elements respectively correspond 'half' or 'full'. If it is a tuple, its two (or three) elements respectively
to the padding on height and width (and possibly depth) axis. correspond to the padding on height and width (and possibly depth)
axis. For asymmetric padding, provide a pair of ints for each dimension.
subsample: tuple of int (symbolic or numeric). Its two or three elements subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and respectively correspond to the subsampling on height and width (and
possibly depth) axis. possibly depth) axis.
...@@ -227,9 +235,11 @@ def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode, ...@@ -227,9 +235,11 @@ def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode,
given axis. None if undefined. given axis. None if undefined.
top_shape: int or None. Corresponds to the top shape on a given axis. top_shape: int or None. Corresponds to the top shape on a given axis.
None if undefined. None if undefined.
border_mode: string or int. If it is a string, it must be border_mode: string, int or tuple of 2 ints. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to 'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis. the padding on the considered axis. If it is a tuple, its two elements
must correspond to the asymmetric padding (e.g., left and right) on
the considered axis.
subsample: int. It must correspond to the subsampling on the subsample: int. It must correspond to the subsampling on the
considered axis. considered axis.
dilation: int. It must correspond to the dilation on the dilation: int. It must correspond to the dilation on the
...@@ -252,9 +262,14 @@ def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode, ...@@ -252,9 +262,14 @@ def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode,
elif border_mode == "valid": elif border_mode == "valid":
kernel_shape = image_shape - top_shape kernel_shape = image_shape - top_shape
else: else:
if border_mode < 0: if isinstance(border_mode, tuple):
pad_l, pad_r = border_mode
else:
pad_l = pad_r = border_mode
if pad_l < 0 or pad_r < 0:
raise ValueError("border_mode must be >= 0") raise ValueError("border_mode must be >= 0")
kernel_shape = (image_shape + 2 * border_mode - top_shape)
kernel_shape = (image_shape + pad_l + pad_r - top_shape)
if dilation > 1: if dilation > 1:
kernel_shape = kernel_shape / dilation kernel_shape = kernel_shape / dilation
...@@ -284,9 +299,10 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape, ...@@ -284,9 +299,10 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
to: batch size, number of output channels, height and width (and to: batch size, number of output channels, height and width (and
possibly depth) of the image. None where undefined. possibly depth) of the image. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'. or numeric) or pairs of ints. If it is a string, it must be 'valid',
If it is a tuple, its two (or three) elements respectively correspond 'half' or 'full'. If it is a tuple, its two (or three) elements respectively
to the padding on height and width (and possibly depth) axis. correspond to the padding on height and width (and possibly depth)
axis. For asymmetric padding, provide a pair of ints for each dimension.
subsample: tuple of int (symbolic or numeric). Its two or three elements subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and respectively correspond to the subsampling on height and width (and
possibly depth) axis. possibly depth) axis.
...@@ -340,9 +356,11 @@ def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode, ...@@ -340,9 +356,11 @@ def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode,
axis. None if undefined. axis. None if undefined.
top_shape: int or None. Corresponds to the top shape on a given axis. top_shape: int or None. Corresponds to the top shape on a given axis.
None if undefined. None if undefined.
border_mode: string or int. If it is a string, it must be border_mode: string, int or tuple of 2 ints. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to 'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis. the padding on the considered axis. If it is a tuple, its two elements
must correspond to the asymmetric padding (e.g., left and right) on
the considered axis.
subsample: int. It must correspond to the subsampling on the subsample: int. It must correspond to the subsampling on the
considered axis. considered axis.
dilation: int. It must correspond to the dilation on the dilation: int. It must correspond to the dilation on the
...@@ -363,23 +381,27 @@ def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode, ...@@ -363,23 +381,27 @@ def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode,
# Implicit dilated kernel shape # Implicit dilated kernel shape
dil_kernel_shape = (kernel_shape - 1) * dilation + 1 dil_kernel_shape = (kernel_shape - 1) * dilation + 1
if border_mode == "half": if border_mode == "half":
pad = dil_kernel_shape // 2 pad_l = pad_r = dil_kernel_shape // 2
elif border_mode == "full": elif border_mode == "full":
pad = dil_kernel_shape - 1 pad_l = pad_r = dil_kernel_shape - 1
elif border_mode == "valid": elif border_mode == "valid":
pad = 0 pad_l = pad_r = 0
else: else:
pad = border_mode if isinstance(border_mode, tuple):
if pad < 0: pad_l, pad_r = border_mode
else:
pad_l = pad_r = border_mode
if pad_l < 0 or pad_r < 0:
raise ValueError("border_mode must be >= 0") raise ValueError("border_mode must be >= 0")
# In case of symbolic shape, we want to build the smallest graph # In case of symbolic shape, we want to build the smallest graph
# image_shape = (top_shape - 1) * s - 2 * pad + dil_kernel_shape + a # image_shape = (top_shape - 1) * s - 2 * pad + dil_kernel_shape + a
# where 0 <= a < subsample, but we have checked that subsample == 1 # where 0 <= a < subsample, but we have checked that subsample == 1
if pad == 0: image_shape = (top_shape + dil_kernel_shape - 1)
image_shape = (top_shape + dil_kernel_shape - 1) if pad_l > 0:
else: image_shape -= pad_l
image_shape = (top_shape - 2 * pad + dil_kernel_shape - 1) if pad_r > 0:
image_shape -= pad_r
return image_shape return image_shape
...@@ -405,9 +427,10 @@ def check_conv_gradinputs_shape(image_shape, kernel_shape, output_shape, ...@@ -405,9 +427,10 @@ def check_conv_gradinputs_shape(image_shape, kernel_shape, output_shape,
to: batch size, number of output channels, height and width to: batch size, number of output channels, height and width
(and possibly depth) of the output. None where undefined. (and possibly depth) of the output. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'. or numeric) or pairs of ints. If it is a string, it must be 'valid',
If it is a tuple, its two (or three) elements respectively correspond 'half' or 'full'. If it is a tuple, its two (or three) elements respectively
to the padding on height and width (and possibly depth) axis. correspond to the padding on height and width (and possibly depth)
axis. For asymmetric padding, provide a pair of ints for each dimension.
subsample: tuple of int (symbolic or numeric). Its two or three elements subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and respectively correspond to the subsampling on height and width (and
possibly depth) axis. possibly depth) axis.
...@@ -533,6 +556,63 @@ def assert_shape(x, expected_shape, msg='Unexpected shape.'): ...@@ -533,6 +556,63 @@ def assert_shape(x, expected_shape, msg='Unexpected shape.'):
return x return x
def border_mode_to_pad(mode, convdim, kshp):
"""
Computes a tuple for padding given the border_mode parameter
Parameters
----------
mode : int or tuple
One of "valid", "full", "half", an integer, or a tuple where each
member is either an integer or a tuple of 2 positive integers.
convdim : int
The dimensionality of the convolution.
kshp : List/tuple of length 'convdim', indicating the size of the
kernel in the spatial dimensions.
Returns
-------
A tuple containing 'convdim' elements, each of which is a tuple of
two positive integers corresponding to the padding on the left
and the right sides respectively.
"""
if isinstance(mode, tuple):
if len(mode) != convdim:
raise ValueError(
'invalid border_mode {} which must be a '
'tuple of length {}'.format(mode, convdim))
border = ()
for m in mode:
if isinstance(m, integer_types) and m >= 0:
border += ((m, m),)
elif isinstance(m, tuple) and min(m) >= 0 and \
all(isinstance(b, integer_types) for b in m):
if len(m) != 2:
raise NotImplementedError(
'Asymmetric padding not implemented '
'for {}d'.format(len(m)))
border += ((m[0], m[1]),)
else:
raise ValueError(
'invalid border mode {}. The tuple can only contain '
'integers or tuples of length 2'.format(mode))
pad = border
elif mode == 'full':
pad = tuple((kshp[i] - 1,) * 2 for i in range(convdim))
elif mode == 'half':
pad = tuple((kshp[i] // 2,) * 2 for i in range(convdim))
elif mode == 'valid':
pad = ((0, 0),) * convdim
else:
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple '
'of length {}'.format(mode, convdim))
return pad
def conv2d(input, def conv2d(input,
filters, filters,
input_shape=None, input_shape=None,
...@@ -632,6 +712,12 @@ def separable_conv2d(input, ...@@ -632,6 +712,12 @@ def separable_conv2d(input,
width, then perform a valid convolution. width, then perform a valid convolution.
``(int1, int2)``: pad input with a symmetric border of ``int1`` rows ``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
and ``int2`` columns, then perform a valid convolution. and ``int2`` columns, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``:
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
``((int1, int2), (int3, int4))``: pad input with an asymmetric
border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
along the second dimension.
subsample: tuple of len 2 subsample: tuple of len 2
Factor by which to subsample the output. Factor by which to subsample the output.
...@@ -959,7 +1045,7 @@ def conv2d_grad_wrt_inputs(output_grad, ...@@ -959,7 +1045,7 @@ def conv2d_grad_wrt_inputs(output_grad,
Optional, possibly used to choose an optimal implementation. Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that You can give ``None`` for any element of the list to specify that
this element is not known at compile time. this element is not known at compile time.
border_mode : str, int or tuple of two int border_mode: str, int or a tuple of two ints or pairs of ints
Either of the following: Either of the following:
``'valid'`` ``'valid'``
...@@ -986,6 +1072,14 @@ def conv2d_grad_wrt_inputs(output_grad, ...@@ -986,6 +1072,14 @@ def conv2d_grad_wrt_inputs(output_grad,
pad input with a symmetric border of ``int1`` rows and pad input with a symmetric border of ``int1`` rows and
``int2`` columns, then perform a valid convolution. ``int2`` columns, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
``((int1, int2), (int3, int4))``
pad input with an asymmetric border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
along the second dimension.
subsample : tuple of len 2 subsample : tuple of len 2
The subsampling used in the forward pass. Also called strides The subsampling used in the forward pass. Also called strides
elsewhere. elsewhere.
...@@ -1245,7 +1339,7 @@ def conv2d_grad_wrt_weights(input, ...@@ -1245,7 +1339,7 @@ def conv2d_grad_wrt_weights(input,
Optional, possibly used to choose an optimal implementation. Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify You can give ``None`` for any element of the list to specify
that this element is not known at compile time. that this element is not known at compile time.
border_mode : str, int or tuple of two ints border_mode: str, int or a tuple of two ints or pairs of ints
Either of the following: Either of the following:
``'valid'`` ``'valid'``
...@@ -1271,6 +1365,14 @@ def conv2d_grad_wrt_weights(input, ...@@ -1271,6 +1365,14 @@ def conv2d_grad_wrt_weights(input,
``(int1, int2)`` ``(int1, int2)``
pad input with a symmetric border of ``int1`` rows and pad input with a symmetric border of ``int1`` rows and
``int2`` columns, then perform a valid convolution. ``int2`` columns, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
``((int1, int2), (int3, int4))``
pad input with an asymmetric border of ``(int1, int2)`` along
one dimension and ``(int3, int4)`` along the second dimension.
subsample : tuple of len 2 subsample : tuple of len 2
The subsampling used in the forward pass of the convolutional The subsampling used in the forward pass of the convolutional
operation. Also called strides elsewhere. operation. Also called strides elsewhere.
...@@ -1484,6 +1586,108 @@ def conv3d_grad_wrt_weights(input, ...@@ -1484,6 +1586,108 @@ def conv3d_grad_wrt_weights(input,
return gradWeight_op(input, output_grad, filter_shape[-3:]) return gradWeight_op(input, output_grad, filter_shape[-3:])
def causal_conv1d(input,
filters,
filter_shape,
input_shape=None,
subsample=1,
filter_flip=True,
filter_dilation=1,
num_groups=1,
unshared=False):
"""
Computes (dilated) causal convolution
The output at time t depends only on the inputs till t-1. Used for
modelling temporal data.
See [WaveNet: A Generative Model for Raw Audio, section 2.1]
(https://arxiv.org/abs/1609.03499).
Parameters
----------
input : symbolic 3D tensor
mini-batch of feature vector stacks, of shape
(batch_size, input_channels, input_length)
See the optional parameter ``input_shape``
filters : symbolic 3D tensor
Set of filters used in the CNN, of shape
(output_channels, input_channels, filter_length)
filter_shape : [None/int/Constant] * 2 + [Tensor/int/Constant]
The shape of the filters parameter.
A tuple/list of len 3, with the first two dimensions
being None or int or Constant and the last dimension being
Tensor or int or Constant.
Not optional, since the filter length is needed to calculate
the left padding for causality.
input_shape : None or [None/int/Constant] * 3
The shape of the input parameter.
None, or a tuple/list of len 3.
Optional, possibly used to choose an optimal implementation.
subsample : int
The factor by which to subsample the output. Also called strides
elsewhere.
filter_dilation : int
Factor by which to subsample (stride) the input. Also called
dilation factor.
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
unshared : bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
Returns
-------
Symbolic 3D tensor.
Set of feature vectors generated by convolutional layer. Tensor is
of shape (batch_size, output_channels, output_length)
Notes
-----
:note: Currently, this is implemented with the 2D convolution ops.
"""
input = as_tensor_variable(input)
filters = as_tensor_variable(filters)
if input.ndim != 3:
raise ValueError('Input should be 3D for causal convolution.')
if filters.ndim != 3:
raise ValueError('Filters should be 3D for causal convolution')
input = input.dimshuffle(0, 1, 2, 'x')
filters = filters.dimshuffle(0, 1, 2, 'x')
if input_shape is not None:
assert(len(input_shape) == 3)
input_shape = tuple(input_shape)
input_shape += (1,)
assert(len(filter_shape) == 3)
filter_shape = tuple(filter_shape)
filter_shape += (1,)
left_pad = filter_dilation * (filter_shape[2] - 1)
subsample = (subsample, 1)
filter_dilation = (filter_dilation, 1)
conv_op = AbstractConv2d(imshp=input_shape,
kshp=filter_shape,
border_mode=((left_pad, 0), 0),
subsample=subsample,
filter_flip=filter_flip,
filter_dilation=filter_dilation,
num_groups=num_groups,
unshared=unshared)
output = conv_op(input, filters)
return output[:, :, :, 0]
def bilinear_kernel_2D(ratio, normalize=True): def bilinear_kernel_2D(ratio, normalize=True):
"""Compute 2D kernel for bilinear upsampling """Compute 2D kernel for bilinear upsampling
...@@ -1689,7 +1893,7 @@ class BaseAbstractConv(Op): ...@@ -1689,7 +1893,7 @@ class BaseAbstractConv(Op):
element is not known at compile time. element is not known at compile time.
kshp is defined w.r.t the forward conv. kshp is defined w.r.t the forward conv.
border_mode: str, int or tuple of ``convdim`` ints border_mode: str, int or a tuple of two ints or pairs of ints
Either of the following: Either of the following:
``'valid'``: apply filter wherever it completely overlaps with the ``'valid'``: apply filter wherever it completely overlaps with the
...@@ -1704,6 +1908,12 @@ class BaseAbstractConv(Op): ...@@ -1704,6 +1908,12 @@ class BaseAbstractConv(Op):
width, then perform a valid convolution. width, then perform a valid convolution.
``(int1, int2)``: (for 2D) pad input with a symmetric border of ``int1``, ``(int1, int2)``: (for 2D) pad input with a symmetric border of ``int1``,
``int2``, then perform a valid convolution. ``int2``, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``: (for 2D)
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
``((int1, int2), (int3, int4))``: (for 2D) pad input with an asymmetric
border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
along the second dimension.
``(int1, int2, int3)``: (for 3D) pad input with a symmetric border of ``(int1, int2, int3)``: (for 3D) pad input with a symmetric border of
``int1``, ``int2`` and ``int3``, then perform a valid convolution. ``int1``, ``int2`` and ``int3``, then perform a valid convolution.
...@@ -1751,21 +1961,40 @@ class BaseAbstractConv(Op): ...@@ -1751,21 +1961,40 @@ class BaseAbstractConv(Op):
filter_dilation = (1,) * convdim filter_dilation = (1,) * convdim
if isinstance(border_mode, integer_types): if isinstance(border_mode, integer_types):
if border_mode < 0:
raise ValueError(
'invalid border_mode {}, which must be a '
'non-negative integer'.format(border_mode))
border_mode = (border_mode,) * convdim border_mode = (border_mode,) * convdim
if isinstance(border_mode, tuple): elif isinstance(border_mode, tuple):
if len(border_mode) != convdim: if len(border_mode) != convdim:
raise ValueError( raise ValueError(
'border mode must have exactly {} values, ' 'invalid border_mode {}, which must be a '
'but was {}'.format(convdim, border_mode)) 'tuple of length {}'.format(border_mode, convdim))
border_mode = tuple(map(int, border_mode)) new_border_mode = ()
if border_mode == (0,) * convdim: for mode in border_mode:
border_mode = 'valid' if not((isinstance(mode, integer_types) and mode >= 0) or
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or (isinstance(mode, tuple) and len(mode) == 2 and min(mode) >= 0 and
border_mode in ('valid', 'full', 'half')): all(isinstance(m, integer_types) for m in mode))):
raise ValueError(
'invalid border mode {}. The tuple can only contain integers '
' or pairs of integers'.format(border_mode))
if isinstance(mode, tuple):
if convdim != 2:
raise NotImplementedError(
'Asymmetric padding not implemented for {}D'.format(convdim))
if mode[0] == mode[1]:
mode = mode[0]
new_border_mode += (mode,)
border_mode = new_border_mode
elif border_mode not in ('valid', 'full', 'half'):
raise ValueError( raise ValueError(
'invalid border_mode {}, which must be either ' 'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of {}' '"valid", "full", "half", an integer or a tuple '
' integers'.format(border_mode, convdim)) 'of length {}'.format(border_mode, convdim))
if isinstance(border_mode, tuple) and \
all(mode == (0, 0) or mode == 0 for mode in border_mode):
border_mode = 'valid'
self.imshp = tuple(imshp) if imshp else (None,) * (2 + convdim) self.imshp = tuple(imshp) if imshp else (None,) * (2 + convdim)
for imshp_i in self.imshp: for imshp_i in self.imshp:
...@@ -2025,27 +2254,16 @@ class AbstractConv(BaseAbstractConv): ...@@ -2025,27 +2254,16 @@ class AbstractConv(BaseAbstractConv):
% self.convdim) % self.convdim)
o, = out_ o, = out_
mode = self.border_mode mode = self.border_mode
pad = border_mode_to_pad(mode, self.convdim, dil_kernshp)
if not ((isinstance(mode, tuple) and min(mode) >= 0) or if any(p != (0, 0) for p in pad):
mode in ('valid', 'full', 'half')):
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
' integers'.format(mode))
if mode == "full":
mode = tuple(dil_kernshp[i] - 1 for i in range(self.convdim))
elif mode == "half":
mode = tuple(dil_kernshp[i] // 2 for i in range(self.convdim))
if isinstance(mode, tuple):
pad = tuple(int(mode[i]) for i in range(self.convdim))
mode = "valid" mode = "valid"
new_img = np.zeros((img.shape[0], img.shape[1]) + new_img = np.zeros((img.shape[0], img.shape[1]) +
tuple(img.shape[i + 2] + 2 * pad[i] tuple(img.shape[i + 2] + pad[i][0] + pad[i][1]
for i in range(self.convdim)), for i in range(self.convdim)),
dtype=img.dtype) dtype=img.dtype)
new_img[(slice(None), slice(None)) + new_img[(slice(None), slice(None)) +
tuple(slice(pad[i], img.shape[i + 2] + pad[i]) tuple(slice(pad[i][0], img.shape[i + 2] + pad[i][0])
for i in range(self.convdim))] = img for i in range(self.convdim))] = img
img = new_img img = new_img
if not self.filter_flip: if not self.filter_flip:
...@@ -2080,7 +2298,6 @@ class AbstractConv(BaseAbstractConv): ...@@ -2080,7 +2298,6 @@ class AbstractConv(BaseAbstractConv):
conv_out = conv_out[(slice(None), slice(None)) + conv_out = conv_out[(slice(None), slice(None)) +
tuple(slice(None, None, self.subsample[i]) tuple(slice(None, None, self.subsample[i])
for i in range(self.convdim))] for i in range(self.convdim))]
o[0] = node.outputs[0].type.filter(conv_out) o[0] = node.outputs[0].type.filter(conv_out)
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
...@@ -2296,34 +2513,21 @@ class AbstractConv_gradWeights(BaseAbstractConv): ...@@ -2296,34 +2513,21 @@ class AbstractConv_gradWeights(BaseAbstractConv):
o, = out_ o, = out_
mode = self.border_mode
if not ((isinstance(mode, tuple) and min(mode) >= 0) or
mode in ('valid', 'full', 'half')):
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
' integers'.format(mode))
if self.unshared and self.convdim != 2: if self.unshared and self.convdim != 2:
raise NotImplementedError('Unshared convolution not implemented for %dD' raise NotImplementedError('Unshared convolution not implemented for %dD'
% self.convdim) % self.convdim)
dil_shape = tuple((shape[i] - 1) * self.filter_dilation[i] + 1 dil_shape = tuple((shape[i] - 1) * self.filter_dilation[i] + 1
for i in range(self.convdim)) for i in range(self.convdim))
if mode == "full": pad = border_mode_to_pad(self.border_mode, self.convdim, dil_shape)
mode = tuple(dil_shape[i] - 1 for i in range(self.convdim))
elif mode == "half":
mode = tuple(dil_shape[i] // 2 for i in range(self.convdim))
if isinstance(mode, tuple):
pad = tuple(int(mode[i]) for i in range(self.convdim))
mode = "valid" if any(p != (0, 0) for p in pad):
new_img = np.zeros((img.shape[0], img.shape[1]) + new_img = np.zeros((img.shape[0], img.shape[1]) +
tuple(img.shape[i + 2] + 2 * pad[i] tuple(img.shape[i + 2] + pad[i][0] + pad[i][1]
for i in range(self.convdim)), for i in range(self.convdim)),
dtype=img.dtype) dtype=img.dtype)
new_img[(slice(None), slice(None)) + new_img[(slice(None), slice(None)) +
tuple(slice(pad[i], img.shape[i + 2] + pad[i]) tuple(slice(pad[i][0], img.shape[i + 2] + pad[i][0])
for i in range(self.convdim))] = img for i in range(self.convdim))] = img
img = new_img img = new_img
...@@ -2611,16 +2815,13 @@ class AbstractConv_gradInputs(BaseAbstractConv): ...@@ -2611,16 +2815,13 @@ class AbstractConv_gradInputs(BaseAbstractConv):
topgrad = np.asarray(topgrad) topgrad = np.asarray(topgrad)
o, = out_ o, = out_
mode = self.border_mode
if not ((isinstance(mode, tuple) and min(mode) >= 0) or
mode in ('valid', 'full', 'half')):
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
' integers'.format(mode))
if self.unshared and self.convdim != 2: if self.unshared and self.convdim != 2:
raise NotImplementedError('Unshared convolution not implemented for %dD' raise NotImplementedError('Unshared convolution not implemented for %dD'
% self.convdim) % self.convdim)
dil_kernshp = tuple((kern.shape[-self.convdim + i] - 1) * self.filter_dilation[i] + 1
for i in range(self.convdim))
pad = border_mode_to_pad(self.border_mode, self.convdim, dil_kernshp)
imshp = self.imshp[:] if self.imshp is not None else [None] * (2 + self.convdim) imshp = self.imshp[:] if self.imshp is not None else [None] * (2 + self.convdim)
fallback_imshp = ([topgrad.shape[0], kern.shape[-self.convdim - 1]] + fallback_imshp = ([topgrad.shape[0], kern.shape[-self.convdim - 1]] +
...@@ -2636,20 +2837,9 @@ class AbstractConv_gradInputs(BaseAbstractConv): ...@@ -2636,20 +2837,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
'would produce an output of shape {}, but the given topgrad ' 'would produce an output of shape {}, but the given topgrad '
'has shape {}'.format(tuple(expected_topgrad_shape), 'has shape {}'.format(tuple(expected_topgrad_shape),
tuple(topgrad.shape))) tuple(topgrad.shape)))
dil_kernshp = tuple((kern.shape[-self.convdim + i] - 1) * self.filter_dilation[i] + 1
for i in range(self.convdim))
pad = (0,) * self.convdim
if mode == "full":
pad = tuple(dil_kernshp[i] - 1 for i in range(self.convdim))
elif mode == "half":
pad = tuple(dil_kernshp[i] // 2 for i in range(self.convdim))
elif isinstance(mode, tuple):
pad = tuple(mode[i] for i in range(self.convdim))
if any(self.subsample[i] > 1 for i in range(self.convdim)): if any(self.subsample[i] > 1 for i in range(self.convdim)):
new_shape = ((topgrad.shape[0], topgrad.shape[1]) + new_shape = ((topgrad.shape[0], topgrad.shape[1]) +
tuple(shape[i] + 2 * pad[i] - dil_kernshp[i] + 1 tuple(shape[i] + pad[i][0] + pad[i][1] - dil_kernshp[i] + 1
for i in range(self.convdim))) for i in range(self.convdim)))
new_topgrad = np.zeros((new_shape), dtype=topgrad.dtype) new_topgrad = np.zeros((new_shape), dtype=topgrad.dtype)
new_topgrad[(slice(None), slice(None)) + new_topgrad[(slice(None), slice(None)) +
...@@ -2705,9 +2895,9 @@ class AbstractConv_gradInputs(BaseAbstractConv): ...@@ -2705,9 +2895,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
if self.filter_flip: if self.filter_flip:
img = img[flip_filters] img = img[flip_filters]
if any(p > 0 for p in pad): if any(p != (0, 0) for p in pad):
img = img[(slice(None), slice(None)) + img = img[(slice(None), slice(None)) +
tuple(slice(pad[i], img.shape[i + 2] - pad[i]) tuple(slice(pad[i][0], img.shape[i + 2] - pad[i][1])
for i in range(self.convdim))] for i in range(self.convdim))]
o[0] = node.outputs[0].type.filter(img) o[0] = node.outputs[0].type.filter(img)
......
...@@ -31,23 +31,23 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ...@@ -31,23 +31,23 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
void im2col(const %(float_type)s* data_im, const int channels, void im2col(const %(float_type)s* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w, const int height, const int width, const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w, const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w, const int pad_hl, const int pad_hr, const int pad_wl, const int pad_wr,
const int stride_h, const int stride_w, const int stride_h, const int stride_w,
%(float_type)s* data_col) { %(float_type)s* data_col) {
// Implicit dilated kernel size // Implicit dilated kernel size
int dil_kernel_h = (kernel_h - 1) * dilation_h + 1; int dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
int dil_kernel_w = (kernel_w - 1) * dilation_w + 1; int dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1; int height_col = (height + pad_hl + pad_hr - dil_kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1; int width_col = (width + pad_wl + pad_wr - dil_kernel_w) / stride_w + 1;
int channels_col = channels * kernel_h * kernel_w; int channels_col = channels * kernel_h * kernel_w;
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
int w_offset = c %% kernel_w; int w_offset = c %% kernel_w;
int h_offset = (c / kernel_w) %% kernel_h; int h_offset = (c / kernel_w) %% kernel_h;
int c_im = c / kernel_h / kernel_w; int c_im = c / kernel_h / kernel_w;
for (int h = 0; h < height_col; ++h) { for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h; int h_pad = h * stride_h - pad_hl + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) { for (int w = 0; w < width_col; ++w) {
int w_pad = w * stride_w - pad_w + w_offset * dilation_w; int w_pad = w * stride_w - pad_wl + w_offset * dilation_w;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_col[(npy_intp)(c * height_col + h) * width_col + w] = data_col[(npy_intp)(c * height_col + h) * width_col + w] =
data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad]; data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad];
...@@ -64,13 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels, ...@@ -64,13 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels,
void col2im(const %(float_type)s* data_col, const int channels, void col2im(const %(float_type)s* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w, const int height, const int width, const int patch_h, const int patch_w,
const int dilation_h, const int dilation_w, const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w, const int stride_h, const int pad_hl, const int pad_hr, const int pad_wl, const int pad_wr,
const int stride_w, %(float_type)s* data_im) { const int stride_h, const int stride_w,
%(float_type)s* data_im) {
// Implicit dilated patch // Implicit dilated patch
int dil_patch_h = (patch_h - 1) * dilation_h + 1; int dil_patch_h = (patch_h - 1) * dilation_h + 1;
int dil_patch_w = (patch_w - 1) * dilation_w + 1; int dil_patch_w = (patch_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1; int height_col = (height + pad_hl + pad_hr - dil_patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1; int width_col = (width + pad_wl + pad_wr - dil_patch_w) / stride_w + 1;
int num_kernels = channels * height * width; int num_kernels = channels * height * width;
int channels_col = channels * patch_h * patch_w; int channels_col = channels * patch_h * patch_w;
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
...@@ -78,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels, ...@@ -78,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels,
int h_offset = (c / patch_w) %% patch_h; int h_offset = (c / patch_w) %% patch_h;
int c_im = c / patch_h / patch_w; int c_im = c / patch_h / patch_w;
for (int h = 0; h < height_col; ++h) { for (int h = 0; h < height_col; ++h) {
int h_pad = h * stride_h - pad_h + h_offset * dilation_h; int h_pad = h * stride_h - pad_hl + h_offset * dilation_h;
for (int w = 0; w < width_col; ++w) { for (int w = 0; w < width_col; ++w) {
int w_pad = w * stride_w - pad_w + w_offset * dilation_w; int w_pad = w * stride_w - pad_wl + w_offset * dilation_w;
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad] += data_im[(npy_intp)(c_im * height + h_pad) * width + w_pad] +=
data_col[(npy_intp)(c * height_col + h) * width_col + w]; data_col[(npy_intp)(c * height_col + h) * width_col + w];
...@@ -105,8 +106,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -105,8 +106,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const int dW = 1, const int dW = 1,
const int dilH = 1, const int dilH = 1,
const int dilW = 1, const int dilW = 1,
const int padH = 0, const int padH_l = 0,
const int padW = 0, const int padH_r = 0,
const int padW_l = 0,
const int padW_r = 0,
const int numgroups = 1, const int numgroups = 1,
const int unshared = 0) const int unshared = 0)
{ {
...@@ -172,8 +175,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -172,8 +175,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const int dil_kH = (kH - 1) * dilH + 1; const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1; const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth) // top: (batchSize, nFilters, topHeight, topWidth)
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH); const int topHeightNoDH = (bottomHeight + padH_l + padH_r - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW); const int topWidthNoDW = (bottomWidth + padW_l + padW_r - dil_kW);
// the above values might be negative so we need to use Python-like // the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output. // flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only // note: this macro implements Python's // for negative x only
...@@ -303,7 +306,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -303,7 +306,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
int tid = %(omp_get_thread_num)s; int tid = %(omp_get_thread_num)s;
// First, im2col // First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride, nChannels, im2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride, nChannels,
bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH, padW, dH, dW, bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r, dH, dW,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride); (%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
// Second, gemm // Second, gemm
if (unshared) { if (unshared) {
...@@ -396,7 +399,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -396,7 +399,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
int tid = %(omp_get_thread_num)s; int tid = %(omp_get_thread_num)s;
// First, im2col // First, im2col
im2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride, im2col((%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride,
nChannels, bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH, padW, dH, dW, nChannels, bottomHeight,bottomWidth, kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r, dH, dW,
(%(float_type)s*)PyArray_DATA(col)+ tid * col_stride); (%(float_type)s*)PyArray_DATA(col)+ tid * col_stride);
// Second, gemm // Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0 // Note that we accumulate into weight. We do so by setting beta = 0
...@@ -519,7 +522,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -519,7 +522,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
} }
// col2im back to the data // col2im back to the data
col2im((%(float_type)s*)PyArray_DATA(col) + tid * col_stride, nChannels, bottomHeight, bottomWidth, col2im((%(float_type)s*)PyArray_DATA(col) + tid * col_stride, nChannels, bottomHeight, bottomWidth,
kH, kW, dilH, dilW, padH, padW, kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r,
dH, dW, (%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride); dH, dW, (%(float_type)s*)PyArray_DATA(bottom) + n * batch_bottom_stride);
} }
// Restore to previous blas threads // Restore to previous blas threads
......
...@@ -34,8 +34,8 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -34,8 +34,8 @@ class BaseCorrMM(gof.OpenMPOp):
Parameters Parameters
---------- ----------
border_mode : {'valid', 'full', 'half'} border_mode : {'valid', 'full', 'half'}
Additionally, the padding size could be directly specified by an integer Additionally, the padding size could be directly specified by an integer,
or a pair of integers a pair of integers, or two pairs of integers.
subsample subsample
Perform subsampling of the output (default: (1, 1)). Perform subsampling of the output (default: (1, 1)).
filter_dilation filter_dilation
...@@ -55,7 +55,8 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -55,7 +55,8 @@ class BaseCorrMM(gof.OpenMPOp):
('DIRECTION_BACKPROP_INPUTS', 'backprop inputs')), # 2 ('DIRECTION_BACKPROP_INPUTS', 'backprop inputs')), # 2
dH=int64, dW=int64, dH=int64, dW=int64,
dilH=int64, dilW=int64, dilH=int64, dilW=int64,
padH=int64, padW=int64, padH_l=int64, padH_r=int64,
padW_l=int64, padW_r=int64,
num_groups=int64, unshared=int8) num_groups=int64, unshared=int8)
def __init__(self, border_mode="valid", subsample=(1, 1), def __init__(self, border_mode="valid", subsample=(1, 1),
...@@ -66,20 +67,29 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -66,20 +67,29 @@ class BaseCorrMM(gof.OpenMPOp):
raise ValueError( raise ValueError(
'invalid border_mode {}, which must be a ' 'invalid border_mode {}, which must be a '
'non-negative integer'.format(border_mode)) 'non-negative integer'.format(border_mode))
border_mode = (border_mode, border_mode) border_mode = ((border_mode, border_mode),) * 2
if isinstance(border_mode, tuple): elif isinstance(border_mode, tuple):
if len(border_mode) != 2 or border_mode[0] < 0 or border_mode[1] < 0: if len(border_mode) != 2:
raise ValueError( raise ValueError(
'invalid border_mode {}, which must be a ' 'invalid border_mode {} which must be a '
'pair of non-negative integers'.format(border_mode)) 'tuple of length 2'.format(border_mode))
pad_h, pad_w = map(int, border_mode) border = ()
border_mode = (pad_h, pad_w) for mode in border_mode:
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or if isinstance(mode, tuple) and len(mode) == 2 and \
border_mode in ('valid', 'full', 'half')): min(mode) >= 0:
border += ((int(mode[0]), int(mode[1])),)
elif mode >= 0:
border += ((int(mode), int(mode)),)
else:
raise ValueError(
'invalid border mode {}. The tuple can only contain '
'integers or tuples of length 2'.format(border_mode))
border_mode = border
elif border_mode not in ('valid', 'full', 'half'):
raise ValueError( raise ValueError(
'invalid border_mode {}, which must be either ' 'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a pair of' '"valid", "full", "half", an integer or a tuple '
' integers'.format(border_mode)) 'of two integers or a pair of integers'.format(border_mode))
self.border_mode = border_mode self.border_mode = border_mode
if len(subsample) != 2: if len(subsample) != 2:
raise ValueError("subsample must have two elements") raise ValueError("subsample must have two elements")
...@@ -110,14 +120,14 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -110,14 +120,14 @@ class BaseCorrMM(gof.OpenMPOp):
@property @property
def pad(self): def pad(self):
if self.border_mode == "half": if self.border_mode == "half":
return (-1, -1) return ((-1, -1),) * 2
elif self.border_mode == "full": elif self.border_mode == "full":
return (-2, -2) return ((-2, -2),) * 2
elif isinstance(self.border_mode, tuple): elif isinstance(self.border_mode, tuple):
return self.border_mode return self.border_mode
else: else:
assert self.border_mode == "valid" assert self.border_mode == "valid"
return (0, 0) return ((0, 0),) * 2
# Direction should be converted to real enum value, # Direction should be converted to real enum value,
# as it is compared to integer later in c_code_helper(). # as it is compared to integer later in c_code_helper().
...@@ -129,8 +139,10 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -129,8 +139,10 @@ class BaseCorrMM(gof.OpenMPOp):
dilH = property(lambda self: self.filter_dilation[0]) dilH = property(lambda self: self.filter_dilation[0])
dilW = property(lambda self: self.filter_dilation[1]) dilW = property(lambda self: self.filter_dilation[1])
padH = property(lambda self: self.pad[0]) padH_l = property(lambda self: self.pad[0][0])
padW = property(lambda self: self.pad[1]) padH_r = property(lambda self: self.pad[0][1])
padW_l = property(lambda self: self.pad[1][0])
padW_r = property(lambda self: self.pad[1][1])
def __str__(self): def __str__(self):
return '%s{%s, %s, %s, %s %s}' % ( return '%s{%s, %s, %s, %s %s}' % (
...@@ -183,7 +195,7 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -183,7 +195,7 @@ class BaseCorrMM(gof.OpenMPOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (9, self.openmp, blas_header_version()) return (10, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -271,13 +283,13 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -271,13 +283,13 @@ class BaseCorrMM(gof.OpenMPOp):
if height: if height:
height = '(*(npy_int64 *)(PyArray_DATA(%s)))' % height height = '(*(npy_int64 *)(PyArray_DATA(%s)))' % height
else: else:
if ((self.direction != 0) and (self.dH != 1)) or ((self.direction == 1) and (self.padH == -1)): if ((self.direction != 0) and (self.dH != 1)) or ((self.direction == 1) and (self.padH_l == -1 or self.padH_r == -1)):
raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'") raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'")
height = '-1' height = '-1'
if width: if width:
width = '(*(npy_int64 *)(PyArray_DATA(%s)))' % width width = '(*(npy_int64 *)(PyArray_DATA(%s)))' % width
else: else:
if ((self.direction != 0) and (self.dW != 1)) or ((self.direction == 1) and (self.padW == -1)): if ((self.direction != 0) and (self.dW != 1)) or ((self.direction == 1) and (self.padW_l == -1 or self.padW_r == -1)):
raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'") raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'")
width = '-1' width = '-1'
...@@ -290,8 +302,10 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -290,8 +302,10 @@ class BaseCorrMM(gof.OpenMPOp):
int dW = %(params)s->dW; int dW = %(params)s->dW;
int dilH = %(params)s->dilH; int dilH = %(params)s->dilH;
int dilW = %(params)s->dilW; int dilW = %(params)s->dilW;
int padH = %(params)s->padH; int padH_l = %(params)s->padH_l;
int padW = %(params)s->padW; int padH_r = %(params)s->padH_r;
int padW_l = %(params)s->padW_l;
int padW_r = %(params)s->padW_r;
int numgroups = %(params)s->num_groups; int numgroups = %(params)s->num_groups;
int unshared = %(params)s->unshared; int unshared = %(params)s->unshared;
...@@ -334,23 +348,23 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -334,23 +348,23 @@ class BaseCorrMM(gof.OpenMPOp):
// kernel height is specified (perhaps vertical subsampling or half padding) // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH_l == -2 || padH_r == -2) {
// vertical full padding, we can infer the kernel height // vertical full padding, we can infer the kernel height
kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1; kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1;
} }
else { else {
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1; kH = (PyArray_DIMS(bottom)[2] + padH_l + padH_r - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
} }
if (%(width)s != -1) { if (%(width)s != -1) {
// kernel width is specified (perhaps horizontal subsampling or half padding) // kernel width is specified (perhaps horizontal subsampling or half padding)
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW_l == -2 || padW_r == -2) {
kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
else { else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (PyArray_DIMS(bottom)[3] + padW_l + padW_r - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
} }
...@@ -359,24 +373,24 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -359,24 +373,24 @@ class BaseCorrMM(gof.OpenMPOp):
dil_kW = (kW - 1) * dilW + 1; dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested // Auto-padding if requested
if (padH == -1) { // vertical half padding if (padH_l == -1 || padH_r == -1) { // vertical half padding
padH = dil_kH / 2; padH_l = padH_r = dil_kH / 2;
} }
else if (padH == -2) { // vertical full padding else if (padH_l == -2 || padH_r == -2) { // vertical full padding
padH = dil_kH - 1; padH_l = padH_r = dil_kH - 1;
} }
else if (padH < 0) { else if (padH_l < -2 || padH_r < -2) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH must be >= -2"); PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH_l and padH_r must be >= -2");
%(fail)s %(fail)s
} }
if (padW == -1) { // horizontal half padding if (padW_l == -1 || padW_r == -1) { // horizontal half padding
padW = dil_kW / 2; padW_l = padW_r = dil_kW / 2;
} }
else if (padW == -2) { // horizontal full padding else if (padW_l == -2 || padW_r == -2) { // horizontal full padding
padW = dil_kW - 1; padW_l = padW_r = dil_kW - 1;
} }
else if (padW < 0) { else if (padW_l < -2 || padW_r < -2) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW must be >= -2"); PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW_l and padW_r must be >= -2");
%(fail)s %(fail)s
} }
...@@ -386,11 +400,11 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -386,11 +400,11 @@ class BaseCorrMM(gof.OpenMPOp):
switch(direction) { switch(direction) {
case 0: // forward pass case 0: // forward pass
// output is top: (batchsize, num_filters, height, width) // output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1 // height and width: top = (bottom + pad_l + pad_r - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0]; out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0]; out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1); out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + padH_l + padH_r - ((PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1); out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + padW_l + padW_r - ((PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0) if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{ {
if (unshared) { if (unshared) {
...@@ -425,7 +439,7 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -425,7 +439,7 @@ class BaseCorrMM(gof.OpenMPOp):
break; break;
case 1: // backprop wrt. weights case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width) // output is weights: (num_filters, num_channels, height, width)
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1 // height and width: weights = (bottom + pad_l + pad_r - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1]; out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
if (unshared){ if (unshared){
odim = 6; odim = 6;
...@@ -475,8 +489,8 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -475,8 +489,8 @@ class BaseCorrMM(gof.OpenMPOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0]; out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[wdim-3] * numgroups; out_dim[1] = (npy_intp)PyArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - 2*padH); out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - padH_l - padH_r);
out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - 2*padW); out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - padW_l - padW_r);
if (unshared) { if (unshared) {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0) if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{ {
...@@ -564,7 +578,8 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -564,7 +578,8 @@ class BaseCorrMM(gof.OpenMPOp):
} }
// Call corrMM code // Call corrMM code
out2 = corrMM(%(bottom)s, %(weights)s, %(top)s, direction, dH, dW, dilH, dilW, padH, padW, numgroups, unshared); out2 = corrMM(%(bottom)s, %(weights)s, %(top)s, direction, dH, dW, dilH, dilW,
padH_l, padH_r, padW_l, padW_r, numgroups, unshared);
if (out2==NULL){ if (out2==NULL){
%(fail)s %(fail)s
} }
...@@ -588,8 +603,11 @@ class CorrMM(BaseCorrMM): ...@@ -588,8 +603,11 @@ class CorrMM(BaseCorrMM):
``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'`` ``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'``
for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution), for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution),
``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same ``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same
convolution for odd-sized kernels). Note that the two widths are each convolution for odd-sized kernels).
applied twice, once per side (left and right, top and bottom). If it is a tuple containing 2 pairs of integers, then these specify
the padding to be applied on each side ((left, right), (top, bottom)).
Otherwise, each width is applied twice, once per side (left and right,
top and bottom).
subsample subsample
The subsample operation applied to each output image. The subsample operation applied to each output image.
Should be a tuple with 2 elements. Should be a tuple with 2 elements.
...@@ -706,14 +724,20 @@ class CorrMM_gradWeights(BaseCorrMM): ...@@ -706,14 +724,20 @@ class CorrMM_gradWeights(BaseCorrMM):
def infer_shape(self, node, input_shape): def infer_shape(self, node, input_shape):
if self.border_mode == "half": if self.border_mode == "half":
padH = padW = -1 padH_l = padH_r = padW_l = padW_r = -1
elif self.border_mode == "full": elif self.border_mode == "full":
padH = padW = -2 padH_l = padH_r = padW_l = padW_r = -2
elif isinstance(self.border_mode, tuple): elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode border = ()
for mode in self.border_mode:
if isinstance(mode, tuple):
border += ((int(mode[0]), int(mode[1])),)
else:
border += ((int(mode), int(mode)),)
(padH_l, padH_r), (padW_l, padW_r) = border
else: else:
assert self.border_mode == "valid" assert self.border_mode == "valid"
padH = padW = 0 padH_l = padH_r = padW_l = padW_r = 0
dH, dW = self.subsample dH, dW = self.subsample
imshp = input_shape[0] imshp = input_shape[0]
topshp = input_shape[1] topshp = input_shape[1]
...@@ -721,21 +745,21 @@ class CorrMM_gradWeights(BaseCorrMM): ...@@ -721,21 +745,21 @@ class CorrMM_gradWeights(BaseCorrMM):
ssize = ssize // self.num_groups ssize = ssize // self.num_groups
nkern, topshp = topshp[1], list(topshp[2:]) nkern, topshp = topshp[1], list(topshp[2:])
height_width = node.inputs[-2:] height_width = node.inputs[-2:]
if ((dH != 1) or (padH == -1)): if ((dH != 1) or (padH_l == -1) or (padH_r == -1)):
# vertical subsampling or half padding, kernel height is specified # vertical subsampling or half padding, kernel height is specified
kH = height_width[0] kH = height_width[0]
elif padH == -2: elif (padH_l == -2) or (padH_r == -2):
# vertical full padding, we can infer the kernel height # vertical full padding, we can infer the kernel height
kH = 2 - imshp[0] + (topshp[0] - 1) * dH kH = 2 - imshp[0] + (topshp[0] - 1) * dH
else: else:
# explicit padding, we can infer the kernel height # explicit padding, we can infer the kernel height
kH = imshp[0] + 2 * padH - (topshp[0] - 1) * dH kH = imshp[0] + padH_l + padH_r - (topshp[0] - 1) * dH
if ((dW != 1) or (padW == -1)): if ((dW != 1) or (padW_l == -1) or (padW_r == -1)):
kW = height_width[1] kW = height_width[1]
elif (padW == -2): elif (padW_l == -2) or (padW_r == -2):
kW = 2 - imshp[1] + (topshp[1] - 1) * dW kW = 2 - imshp[1] + (topshp[1] - 1) * dW
else: else:
kW = imshp[1] + 2 * padW - (topshp[1] - 1) * dW kW = imshp[1] + padW_l + padW_r - (topshp[1] - 1) * dW
if self.unshared is True: if self.unshared is True:
return [(nkern, topshp[0], topshp[1], ssize, kH, kW)] return [(nkern, topshp[0], topshp[1], ssize, kH, kW)]
else: else:
...@@ -820,14 +844,20 @@ class CorrMM_gradInputs(BaseCorrMM): ...@@ -820,14 +844,20 @@ class CorrMM_gradInputs(BaseCorrMM):
def infer_shape(self, node, input_shape): def infer_shape(self, node, input_shape):
if self.border_mode == "half": if self.border_mode == "half":
padH = padW = -1 padH_l = padH_r = padW_l = padW_r = -1
elif self.border_mode == "full": elif self.border_mode == "full":
padH = padW = -2 padH_l = padH_r = padW_l = padW_r = -2
elif isinstance(self.border_mode, tuple): elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode border = ()
for mode in self.border_mode:
if isinstance(mode, tuple):
border += ((int(mode[0]), int(mode[1])),)
else:
border += ((int(mode), int(mode)),)
(padH_l, padH_r), (padW_l, padW_r) = border
else: else:
assert self.border_mode == "valid" assert self.border_mode == "valid"
padH = padW = 0 padH_l = padH_r = padW_l = padW_r = 0
dH, dW = self.subsample dH, dW = self.subsample
kshp = input_shape[0] kshp = input_shape[0]
topshp = input_shape[1] topshp = input_shape[1]
...@@ -835,27 +865,27 @@ class CorrMM_gradInputs(BaseCorrMM): ...@@ -835,27 +865,27 @@ class CorrMM_gradInputs(BaseCorrMM):
ssize = ssize * self.num_groups ssize = ssize * self.num_groups
bsize, topshp = topshp[0], list(topshp[2:]) bsize, topshp = topshp[0], list(topshp[2:])
height_width = node.inputs[-2:] height_width = node.inputs[-2:]
if padH == -1: if padH_l == -1 or padH_r == -1:
padH = kshp[0] // 2 padH_l = padH_r = kshp[0] // 2
elif padH == -2: elif padH_l == -2 or padH_r == -2:
padH = kshp[0] - 1 padH_l = padH_r = kshp[0] - 1
elif padH < -2: elif padH_l < -2 or padH_r < -2:
raise ValueError('CorrMM_gradInputs: border_mode must be >= 0.') raise ValueError('CorrMM_gradInputs: border_mode must be >= 0.')
if padW == -1: if padW_l == -1 or padW_r == -1:
padW = kshp[1] // 2 padW_l = padW_r = kshp[1] // 2
elif padW == -2: elif padW_l == -2 or padW_r == -2:
padW = kshp[1] - 1 padW_l = padW_r = kshp[1] - 1
elif padW < -2: elif padW_l < -2 or padW_r < -2:
raise ValueError('CorrMM_gradInputs: border_mode must be >= 0.') raise ValueError('CorrMM_gradInputs: border_mode must be >= 0.')
if dH != 1: if dH != 1:
out_shp0 = height_width[0] out_shp0 = height_width[0]
else: else:
out_shp0 = (topshp[0] - 1) * dH + kshp[0] - 2 * padH out_shp0 = (topshp[0] - 1) * dH + kshp[0] - padH_l - padH_r
if dW != 1: if dW != 1:
out_shp1 = height_width[1] out_shp1 = height_width[1]
else: else:
out_shp1 = (topshp[1] - 1) * dW + kshp[1] - 2 * padW out_shp1 = (topshp[1] - 1) * dW + kshp[1] - padW_l - padW_r
out_shp = (out_shp0, out_shp1) out_shp = (out_shp0, out_shp1)
return [(bsize, ssize) + out_shp] return [(bsize, ssize) + out_shp]
......
...@@ -24,6 +24,7 @@ from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D ...@@ -24,6 +24,7 @@ from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D from theano.tensor.nnet.abstract_conv import bilinear_kernel_2D
from theano.tensor.nnet.abstract_conv import bilinear_upsampling from theano.tensor.nnet.abstract_conv import bilinear_upsampling
from theano.tensor.nnet.abstract_conv import separable_conv2d, separable_conv3d from theano.tensor.nnet.abstract_conv import separable_conv2d, separable_conv3d
from theano.tensor.nnet.abstract_conv import causal_conv1d
from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights, from theano.tensor.nnet.corr import (CorrMM, CorrMM_gradWeights,
CorrMM_gradInputs) CorrMM_gradInputs)
from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights, from theano.tensor.nnet.corr3d import (Corr3dMM, Corr3dMM_gradWeights,
...@@ -1894,3 +1895,156 @@ class TestUnsharedConv(unittest.TestCase): ...@@ -1894,3 +1895,156 @@ class TestUnsharedConv(unittest.TestCase):
if verify: if verify:
utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1) utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
class TestAsymmetricPadding(unittest.TestCase):
conv2d = theano.tensor.nnet.abstract_conv.AbstractConv2d
conv2d_gradw = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
conv2d_op = theano.tensor.nnet.abstract_conv.AbstractConv2d
conv2d_gradw_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradWeights
conv2d_gradi_op = theano.tensor.nnet.abstract_conv.AbstractConv2d_gradInputs
mode = theano.compile.mode.Mode(optimizer='None')
img_shape = [(2, 2, 4, 4), (3, 2, 4, 2), (3, 3, 5, 3)]
kern_shape = [(4, 2, 2, 2), (2, 2, 4, 2), (2, 3, 3, 3)]
topgrad_shape = [(2, 4, 6, 6), (3, 2, 3, 4), (3, 2, 6, 1)]
border_mode = [((1, 2), (2, 1)), ((1, 1), (0, 3)), ((2, 1), (0, 0))]
def test_fwd(self):
img_sym = theano.tensor.tensor4('img')
kern_sym = theano.tensor.tensor4('kern')
for imshp, kshp, pad in zip(self.img_shape, self.kern_shape, self.border_mode):
img = np.random.random(imshp).astype(theano.config.floatX)
kern = np.random.random(kshp).astype(theano.config.floatX)
asymmetric_conv_op = self.conv2d(border_mode=pad, subsample=(1, 1),
filter_dilation=(1, 1))
asymmetric_out_sym = asymmetric_conv_op(img_sym, kern_sym)
asymmetric_func = theano.function([img_sym, kern_sym], asymmetric_out_sym, mode=self.mode)
assert any([isinstance(node.op, self.conv2d_op)
for node in asymmetric_func.maker.fgraph.toposort()])
asymmetric_output = asymmetric_func(img, kern)
ref_conv_op = self.conv2d(border_mode="valid", subsample=(1, 1),
filter_dilation=(1, 1))
ref_out_sym = ref_conv_op(img_sym, kern_sym)
ref_func = theano.function([img_sym, kern_sym], ref_out_sym, mode=self.mode)
exp_imshp = (imshp[0], imshp[1],
imshp[2] + pad[0][0] + pad[0][1],
imshp[3] + pad[1][0] + pad[1][1])
exp_img = np.zeros(exp_imshp, dtype=theano.config.floatX)
exp_img[:, :, pad[0][0]:imshp[2] + pad[0][0],
pad[1][0]:imshp[3] + pad[1][0]] = img
ref_output = ref_func(exp_img, kern)
utt.assert_allclose(asymmetric_output, ref_output)
utt.verify_grad(asymmetric_conv_op, [img, kern], mode=self.mode, eps=1)
def test_gradweight(self):
img_sym = theano.tensor.tensor4('img')
top_sym = theano.tensor.tensor4('top')
for imshp, kshp, topshp, pad in zip(self.img_shape, self.kern_shape, self.topgrad_shape, self.border_mode):
img = np.random.random(imshp).astype(theano.config.floatX)
top = np.random.random(topshp).astype(theano.config.floatX)
asymmetric_conv_op = self.conv2d_gradw(border_mode=pad, subsample=(1, 1),
filter_dilation=(1, 1))
asymmetric_out_sym = asymmetric_conv_op(img_sym, top_sym, kshp[-2:])
asymmetric_func = theano.function([img_sym, top_sym], asymmetric_out_sym, mode=self.mode)
assert any([isinstance(node.op, self.conv2d_gradw_op)
for node in asymmetric_func.maker.fgraph.toposort()])
asymmetric_output = asymmetric_func(img, top)
ref_conv_op = self.conv2d_gradw(border_mode="valid", subsample=(1, 1),
filter_dilation=(1, 1))
ref_out_sym = ref_conv_op(img_sym, top_sym, kshp[-2:])
ref_func = theano.function([img_sym, top_sym], ref_out_sym, mode=self.mode)
exp_imshp = (imshp[0], imshp[1],
imshp[2] + pad[0][0] + pad[0][1],
imshp[3] + pad[1][0] + pad[1][1])
exp_img = np.zeros(exp_imshp, dtype=theano.config.floatX)
exp_img[:, :, pad[0][0]:imshp[2] + pad[0][0],
pad[1][0]:imshp[3] + pad[1][0]] = img
ref_output = ref_func(exp_img, top)
utt.assert_allclose(asymmetric_output, ref_output)
def conv_gradweight(inputs_val, output_val):
return asymmetric_conv_op(inputs_val, output_val, tensor.as_tensor_variable(kshp[-2:]))
utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1)
def test_gradinput(self):
kern_sym = theano.tensor.tensor4('kern')
top_sym = theano.tensor.tensor4('top')
for imshp, kshp, topshp, pad in zip(self.img_shape, self.kern_shape, self.topgrad_shape, self.border_mode):
kern = np.random.random(kshp).astype(theano.config.floatX)
top = np.random.random(topshp).astype(theano.config.floatX)
asymmetric_conv_op = self.conv2d_gradi(border_mode=pad, subsample=(1, 1),
filter_dilation=(1, 1))
asymmetric_out_sym = asymmetric_conv_op(kern_sym, top_sym, imshp[-2:])
asymmetric_func = theano.function([kern_sym, top_sym], asymmetric_out_sym, mode=self.mode)
assert any([isinstance(node.op, self.conv2d_gradi_op)
for node in asymmetric_func.maker.fgraph.toposort()])
asymmetric_output = asymmetric_func(kern, top)
ref_conv_op = self.conv2d_gradi(border_mode="valid", subsample=(1, 1),
filter_dilation=(1, 1))
exp_imshp = [imshp[2] + pad[0][0] + pad[0][1],
imshp[3] + pad[1][0] + pad[1][1]]
ref_out_sym = ref_conv_op(kern_sym, top_sym, exp_imshp)
ref_func = theano.function([kern_sym, top_sym], ref_out_sym, mode=self.mode)
ref_output = ref_func(kern, top)
ref_output = ref_output[:, :, pad[0][0]:imshp[2] + pad[0][0],
pad[1][0]:imshp[3] + pad[1][0]]
utt.assert_allclose(asymmetric_output, ref_output)
def conv_gradinputs(filters_val, output_val):
return asymmetric_conv_op(filters_val, output_val, tensor.as_tensor_variable(imshp[-2:]))
utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
class TestCausalConv(unittest.TestCase):
mode = theano.compile.mode.Mode(optimizer='None')
img = np.array([[[2, 4, 9, 5, 8], [0, 0, 4, 0, 5]],
[[2, 5, 8, 5, 5], [1, 3, 0, 7, 9]],
[[7, 0, 7, 1, 0], [0, 1, 4, 7, 2]]]).astype(theano.config.floatX)
kern = np.array([[[5, 3, 1], [3, 1, 0]],
[[6, 4, 9], [2, 2, 7]]]).astype(theano.config.floatX)
dilation = 2
precomp_top = np.array([[[10, 20, 63, 37, 88], [12, 24, 70, 46, 120]],
[[13, 34, 47, 64, 78], [14, 36, 58, 70, 105]],
[[35, 3, 68, 27, 38], [42, 2, 78, 22, 103]]]).astype(theano.config.floatX)
def test_interface(self):
img_sym = theano.tensor.tensor3('img')
kern_sym = theano.tensor.tensor3('kern')
sym_out = causal_conv1d(img_sym, kern_sym, self.kern.shape, filter_dilation=self.dilation)
causal_func = theano.function([img_sym, kern_sym], sym_out, mode=self.mode)
output = causal_func(self.img, self.kern)
utt.assert_allclose(output, self.precomp_top)
def causal_conv_fn(inputs_val, filters_val):
return causal_conv1d(inputs_val, filters_val, self.kern.shape, filter_dilation=1)
utt.verify_grad(causal_conv_fn, [self.img, self.kern], mode=self.mode, eps=1)
...@@ -11,6 +11,7 @@ import theano.tensor as T ...@@ -11,6 +11,7 @@ import theano.tensor as T
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.nnet import corr, conv from theano.tensor.nnet import corr, conv
from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv
from theano.tensor.nnet.tests.test_abstract_conv import TestAsymmetricPadding, TestCausalConv
class TestCorr2D(utt.InferShapeTester): class TestCorr2D(utt.InferShapeTester):
...@@ -454,6 +455,23 @@ class TestUnsharedCorr2d(TestUnsharedConv): ...@@ -454,6 +455,23 @@ class TestUnsharedCorr2d(TestUnsharedConv):
conv2d_gradi_op = corr.CorrMM_gradInputs conv2d_gradi_op = corr.CorrMM_gradInputs
class TestAsymmetricCorr(TestAsymmetricPadding):
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.get_mode("FAST_RUN").excluding('gpuarray')
else:
mode = None
conv2d_op = corr.CorrMM
conv2d_gradw_op = corr.CorrMM_gradWeights
conv2d_gradi_op = corr.CorrMM_gradInputs
class TestCausalCorr(TestCausalConv):
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.get_mode("FAST_RUN").excluding('gpuarray')
else:
mode = None
if __name__ == '__main__': if __name__ == '__main__':
t = TestCorr2D('setUp') t = TestCorr2D('setUp')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论