提交 e514f4d3 authored 作者: Vikram's avatar Vikram

GPU code and some more tests

上级 cd10a53c
......@@ -468,16 +468,33 @@ class BaseGpuCorrMM(CGpuKernelBase):
def __init__(self, border_mode="valid", subsample=(1, 1),
filter_dilation=(1, 1), num_groups=1, unshared=False):
if isinstance(border_mode, integer_types):
border_mode = (border_mode, border_mode)
if isinstance(border_mode, tuple):
pad_h, pad_w = map(int, border_mode)
border_mode = (pad_h, pad_w)
if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or
border_mode in ('valid', 'full', 'half')):
if border_mode < 0:
raise ValueError(
'invalid border_mode {}, which must be a '
'non-negative integer'.format(border_mode))
border_mode = ((border_mode, border_mode),) * 2
elif isinstance(border_mode, tuple):
if len(border_mode) != 2:
raise ValueError(
'invalid border_mode {} which must be a '
'tuple of length 2'.format(border_mode))
border = ()
for mode in border_mode:
if isinstance(mode, integer_types) and mode >= 0:
border += ((mode, mode),)
elif isinstance(mode, tuple) and len(mode) == 2 and \
min(mode) >= 0:
border += ((int(mode[0]), int(mode[1])),)
else:
raise ValueError(
'invalid border mode {}. The tuple can only contain '
'integers or tuples of length 2'.format(border_mode))
border_mode = border
elif border_mode not in ('valid', 'full', 'half'):
raise ValueError(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a pair of'
' integers'.format(border_mode))
'"valid", "full", "half", an integer or a tuple '
'of length 2'.format(border_mode))
self.border_mode = border_mode
if len(subsample) != 2:
raise ValueError("subsample must have two elements")
......@@ -495,7 +512,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def pad(self):
if self.border_mode != 'valid':
return self.border_mode
return (0, 0)
return ((0, 0),) * 2
def __str__(self):
return '%s{%s, %s, %s, %s, %s}' % (
......@@ -537,7 +554,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def c_code_cache_version(self):
# Raise this whenever modifying the C code (including the file).
return (11,)
return (12,)
def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None):
"""
......@@ -587,14 +604,14 @@ class BaseGpuCorrMM(CGpuKernelBase):
numgroups = self.num_groups
unshared = int(self.unshared)
if self.border_mode == "half":
padH = padW = -1
padH_l = padH_r = padW_l = padW_r = -1
elif self.border_mode == "full":
padH = padW = -2
padH_l = padH_r = padW_l = padW_r = -2
elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode
(padH_l, padH_r), (padW_l, padW_r) = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = 0
padH_l = padH_r = padW_l = padW_r = 0
if direction == "forward":
direction = 0
out = top
......@@ -613,13 +630,13 @@ class BaseGpuCorrMM(CGpuKernelBase):
if height:
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height
else:
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH_l == -1)):
raise ValueError("height must be given for backprop with vertical sampling or pad='half'")
height = '-1'
if width:
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width
else:
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW_l == -1)):
raise ValueError("width must be given for backprop with horizontal sampling or pad='half'")
width = '-1'
......@@ -635,8 +652,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
size_t dW = %(dW)s;
size_t dilH = %(dilH)s;
size_t dilW = %(dilW)s;
int padH = %(padH)s;
int padW = %(padW)s;
int padH_l = %(padH_l)s;
int padH_r = %(padH_r)s;
int padW_l = %(padW_l)s;
int padW_r = %(padW_r)s;
int numgroups = %(numgroups)s;
int unshared = %(unshared)s;
......@@ -662,22 +681,22 @@ class BaseGpuCorrMM(CGpuKernelBase):
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s;
}
else if (padH == -2) {
else if (padH_l == -2 || padH_r == -2) {
// vertical full padding, we can infer the kernel height
kH = (2 - PyGpuArray_DIMS(bottom)[2] + (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1;
}
else {
// explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
kH = (PyGpuArray_DIMS(bottom)[2] + padH_l + padH_r - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
}
if (%(width)s != -1) {
kW = %(width)s;
}
else if (padW == -2) {
else if (padW_l == -2 || padW_r == -2) {
kW = (2 - PyGpuArray_DIMS(bottom)[3] + (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (PyGpuArray_DIMS(bottom)[3] + padW_l + padW_r - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
......@@ -686,23 +705,23 @@ class BaseGpuCorrMM(CGpuKernelBase):
dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
if (padH == -1) { // vertical half padding
padH = dil_kH / 2;
if (padH_l == -1 || padH_r == -1) { // vertical half padding
padH_l = padH_r = dil_kH / 2;
}
else if (padH == -2) { // vertical full padding
padH = dil_kH - 1;
else if (padH_l == -2 || padH_r == -2) { // vertical full padding
padH_l = padH_r = dil_kH - 1;
}
else if (padH < 0) {
else if (padH_l < 0 || padH_r < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
%(fail)s
}
if (padW == -1) { // horizontal half padding
padW = dil_kW / 2;
if (padW_l == -1 || padW_r == -1) { // horizontal half padding
padW_l = padW_r = dil_kW / 2;
}
else if (padW == -2) { // horizontal full padding
padW = dil_kW - 1;
else if (padW_l == -2 || padW_r == -2) { // horizontal full padding
padW_l = padW_r = dil_kW - 1;
}
else if (padW < 0) {
else if (padW_l < 0 || padW_r < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
%(fail)s
}
......@@ -718,11 +737,11 @@ class BaseGpuCorrMM(CGpuKernelBase):
switch(direction) {
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
// height and width: top = (bottom + pad_l + pad_r - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = PyGpuArray_DIMS(bottom)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[0];
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] + 2*padH - ((PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1;
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] + padH_l + padH_r - ((PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + padW_l + padW_r - ((PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1;
out_typecode = bottom->ga.typecode;
out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
......@@ -810,8 +829,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - 2*padW;
out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - padH_l - padH_r;
out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - padW_l - padW_r;
out_typecode = top->ga.typecode;
out_context = top->context;
if (unshared) {
......@@ -884,7 +903,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
}
// Call GPU code
out2 = corrMM(%(bottom)s, %(weights)s, %(top)s, direction, dH, dW, dilH, dilW, padH, padW, numgroups, unshared);
out2 = corrMM(%(bottom)s, %(weights)s, %(top)s, direction, dH, dW, dilH, dilW,
padH_l, padH_r, padW_l, padW_r, numgroups, unshared);
if (out2==NULL){
%(fail)s
}
......
......@@ -42,7 +42,7 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
const ga_size height, const ga_size width,
const ga_size kernel_h, const ga_size kernel_w,
const ga_size dilation_h, const ga_size dilation_w,
const ga_size pad_h, const ga_size pad_w,
const ga_size pad_hl, const ga_size pad_wl,
const ga_size stride_h, const ga_size stride_w,
const ga_size height_col, const ga_size width_col,
GLOBAL_MEM DTYPE_INPUT_0 * data_col,
......@@ -57,8 +57,8 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
const ga_size w_col = index % width_col;
const ga_size c_im = h_index / height_col;
const ga_size c_col = c_im * kernel_h * kernel_w;
const ga_size h_offset = h_col * stride_h - pad_h;
const ga_size w_offset = w_col * stride_w - pad_w;
const ga_size h_offset = h_col * stride_h - pad_hl;
const ga_size w_offset = w_col * stride_w - pad_wl;
GLOBAL_MEM DTYPE_INPUT_0 * data_col_ptr = data_col;
data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
GLOBAL_MEM const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset;
......@@ -86,7 +86,7 @@ KERNEL void im2col_kernel(const ga_size n,
// data_im_offset is an offset of elements in the array
const ga_size height, const ga_size width,
const ga_size kernel_h, const ga_size kernel_w,
const ga_size pad_h, const ga_size pad_w,
const ga_size pad_hl, const ga_size pad_wl,
const ga_size stride_h, const ga_size stride_w,
const ga_size height_col, const ga_size width_col,
GLOBAL_MEM DTYPE_INPUT_0 * data_col,
......@@ -101,8 +101,8 @@ KERNEL void im2col_kernel(const ga_size n,
const ga_size w_col = index % width_col;
const ga_size c_im = h_index / height_col;
const ga_size c_col = c_im * kernel_h * kernel_w;
const ga_size h_offset = h_col * stride_h - pad_h;
const ga_size w_offset = w_col * stride_w - pad_w;
const ga_size h_offset = h_col * stride_h - pad_hl;
const ga_size w_offset = w_col * stride_w - pad_wl;
GLOBAL_MEM DTYPE_INPUT_0 * data_col_ptr = data_col;
data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
GLOBAL_MEM const DTYPE_INPUT_0 * data_im_ptr = data_im + data_im_offset;
......@@ -127,7 +127,7 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
const ga_size height, const ga_size width, const ga_size channels,
const ga_size kernel_h, const ga_size kernel_w,
const ga_size dilation_h, const ga_size dilation_w,
const ga_size pad_h, const ga_size pad_w,
const ga_size pad_hl, const ga_size pad_wl,
const ga_size stride_h, const ga_size stride_w,
const ga_size height_col, const ga_size width_col,
GLOBAL_MEM DTYPE_INPUT_0 * data_im,
......@@ -141,8 +141,8 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
for (ga_size index = GID_0 * LDIM_0 + LID_0;
index < (n); index += LDIM_0 * GDIM_0) {
DTYPE_INPUT_0 val = 0;
const ga_size w_im = index % width + pad_w;
const ga_size h_im = (index / width) % height + pad_h;
const ga_size w_im = index % width + pad_wl;
const ga_size h_im = (index / width) % height + pad_hl;
const ga_size c_im = index / (width * height);
ga_size kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
ga_size kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
......@@ -177,7 +177,7 @@ KERNEL void col2im_kernel(const ga_size n,
GLOBAL_MEM const DTYPE_INPUT_0 * data_col, const ga_size offset_col,
const ga_size height, const ga_size width, const ga_size channels,
const ga_size kernel_h, const ga_size kernel_w,
const ga_size pad_h, const ga_size pad_w,
const ga_size pad_hl, const ga_size pad_wl,
const ga_size stride_h, const ga_size stride_w,
const ga_size height_col, const ga_size width_col,
GLOBAL_MEM DTYPE_INPUT_0 * data_im,
......@@ -191,8 +191,8 @@ KERNEL void col2im_kernel(const ga_size n,
for (ga_size index = GID_0 * LDIM_0 + LID_0;
index < (n); index += LDIM_0 * GDIM_0) {
DTYPE_INPUT_0 val = 0;
const ga_size w_im = index % width + pad_w;
const ga_size h_im = (index / width) % height + pad_h;
const ga_size w_im = index % width + pad_wl;
const ga_size h_im = (index / width) % height + pad_hl;
const ga_size c_im = index / (width * height);
// compute the start and end of the output
const ga_size w_col_start =
......@@ -254,15 +254,16 @@ int rgemm(cb_order o, cb_transpose tA, cb_transpose tB,
int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels,
const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w,
const size_t dilation_h, const size_t dilation_w,
const size_t pad_h, const size_t pad_w,
const size_t pad_hl, const size_t pad_hr,
const size_t pad_wl, const size_t pad_wr,
const size_t stride_h, const size_t stride_w,
GpuArray *data_col) {
// We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid.
size_t dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
size_t dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
size_t height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1;
size_t width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1;
size_t height_col = (height + pad_hl + pad_hr - dil_kernel_h) / stride_h + 1;
size_t width_col = (width + pad_wl + pad_wr - dil_kernel_w) / stride_w + 1;
size_t num_kernels = channels * height_col * width_col;
int err;
if (dilation_h != 1 || dilation_w != 1) {
......@@ -270,7 +271,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
1, &num_kernels, 0,
num_kernels, data_im->data, data_im->offset, data_im_offset,
height, width, kernel_h, kernel_w,
dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w, height_col,
dilation_h, dilation_w, pad_hl, pad_wl, stride_h, stride_w, height_col,
width_col, data_col->data, data_col->offset);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
......@@ -282,7 +283,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
1, &num_kernels, 0,
num_kernels, data_im->data, data_im->offset, data_im_offset,
height, width, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, height_col,
pad_hl, pad_wl, stride_h, stride_w, height_col,
width_col, data_col->data, data_col->offset);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
......@@ -296,12 +297,12 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
int col2im(GpuArray *data_col, const size_t channels,
const size_t height, const size_t width, const size_t patch_h, const size_t patch_w,
const size_t dilation_h, const size_t dilation_w,
const size_t pad_h, const size_t pad_w, const size_t stride_h,
const size_t stride_w, GpuArray *data_im, const size_t data_im_offset) {
const size_t pad_hl, const size_t pad_hr, const size_t pad_wl, const size_t pad_wr,
const size_t stride_h, const size_t stride_w, GpuArray *data_im, const size_t data_im_offset) {
size_t dil_patch_h = (patch_h - 1) * dilation_h + 1;
size_t dil_patch_w = (patch_w - 1) * dilation_w + 1;
size_t height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1;
size_t width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1;
size_t height_col = (height + pad_hl + pad_hr - dil_patch_h) / stride_h + 1;
size_t width_col = (width + pad_wl + pad_wr - dil_patch_w) / stride_w + 1;
size_t num_kernels = channels * height * width;
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
......@@ -311,7 +312,7 @@ int col2im(GpuArray *data_col, const size_t channels,
1, &num_kernels, 0,
num_kernels, data_col->data, data_col->offset,
height, width, channels, patch_h, patch_w,
dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w,
dilation_h, dilation_w, pad_hl, pad_wl, stride_h, stride_w,
height_col, width_col, data_im->data, data_im->offset, data_im_offset);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
......@@ -323,7 +324,7 @@ int col2im(GpuArray *data_col, const size_t channels,
1, &num_kernels, 0,
num_kernels, data_col->data, data_col->offset,
height, width, channels, patch_h, patch_w,
pad_h, pad_w, stride_h, stride_w,
pad_hl, pad_wl, stride_h, stride_w,
height_col, width_col, data_im->data, data_im->offset, data_im_offset);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
......@@ -347,8 +348,10 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const size_t dW = 1,
const size_t dilH = 1,
const size_t dilW = 1,
const size_t padH = 0,
const size_t padW = 0,
const size_t padH_l = 0,
const size_t padH_r = 0,
const size_t padW_l = 0,
const size_t padW_r = 0,
const size_t numgroups = 1,
const size_t unshared = 0)
{
......@@ -443,8 +446,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const size_t dil_kH = (kH - 1) * dilH + 1;
const size_t dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const size_t topHeightNoDH = (bottomHeight + padH_l + padH_r - dil_kH);
const size_t topWidthNoDW = (bottomWidth + padW_l + padW_r - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
......@@ -558,7 +561,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
err = im2col(&bottom->ga, n * batch_bottom_stride,
nChannels, bottomHeight,
bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, &col->ga);
padH_l, padH_r, padW_l, padW_r, dH, dW, &col->ga);
if (err != GA_NO_ERROR) {
Py_DECREF(col);
return NULL;
......@@ -618,7 +621,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
err = im2col(&bottom->ga, n * batch_bottom_stride,
nChannels, bottomHeight,
bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, &col->ga);
padH_l, padH_r, padW_l, padW_r, dH, dW, &col->ga);
if (err != GA_NO_ERROR) {
Py_DECREF(col);
return NULL;
......@@ -712,7 +715,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
}
// col2im back to the data
err = col2im(&col->ga, nChannels, bottomHeight, bottomWidth,
kH, kW, dilH, dilW, padH, padW,
kH, kW, dilH, dilW, padH_l, padH_r, padW_l, padW_r,
dH, dW, &bottom->ga, n * batch_bottom_stride);
if (err != GA_NO_ERROR) {
Py_DECREF(col);
......
......@@ -12,6 +12,7 @@ from ..type import gpuarray_shared_constructor
from ..blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs
from .config import mode_with_gpu, mode_without_gpu, ref_cast
from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv
from theano.tensor.nnet.tests.test_abstract_conv import TestAsymmetricPadding
class TestCorrMM(unittest.TestCase):
......@@ -272,3 +273,10 @@ class TestUnsharedGpuCorr2d(TestUnsharedConv):
conv2d_op = GpuCorrMM
conv2d_gradw_op = GpuCorrMM_gradWeights
conv2d_gradi_op = GpuCorrMM_gradInputs
class TestAsymmetricGpu(TestAsymmetricPadding):
mode = mode_with_gpu.excluding('cudnn')
conv2d_op = GpuCorrMM
conv2d_gradw_op = GpuCorrMM_gradWeights
conv2d_gradi_op = GpuCorrMM_gradInputs
......@@ -195,7 +195,7 @@ class BaseCorrMM(gof.OpenMPOp):
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (9, self.openmp, blas_header_version())
return (10, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
......@@ -439,7 +439,7 @@ class BaseCorrMM(gof.OpenMPOp):
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
// height and width: weights = (bottom + pad_l + pad_r - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
if (unshared){
odim = 6;
......
......@@ -11,6 +11,7 @@ import theano.tensor as T
from theano.tests import unittest_tools as utt
from theano.tensor.nnet import corr, conv
from theano.tensor.nnet.tests.test_abstract_conv import Grouped_conv_noOptim, TestUnsharedConv
from theano.tensor.nnet.tests.test_abstract_conv import TestAsymmetricPadding
class TestCorr2D(utt.InferShapeTester):
......@@ -462,6 +463,16 @@ class TestUnsharedCorr2d(TestUnsharedConv):
conv2d_gradi_op = corr.CorrMM_gradInputs
class TestAsymmetricCorr(TestAsymmetricPadding):
if theano.config.mode == "FAST_COMPILE":
mode = theano.compile.get_mode("FAST_RUN").excluding('gpuarray')
else:
mode = None
conv2d_op = corr.CorrMM
conv2d_gradw_op = corr.CorrMM_gradWeights
conv2d_gradi_op = corr.CorrMM_gradInputs
if __name__ == '__main__':
t = TestCorr2D('setUp')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论