提交 30dd8bdc authored 作者: Gijs van Tulder's avatar Gijs van Tulder

CorrMM should use height/width if it is given.

上级 676cf29d
......@@ -528,7 +528,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def c_code_cache_version(self):
# Raise this whenever modifying the code below.
return (3,)
return (4,)
def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None):
"""
......@@ -650,8 +650,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
kW = PyGpuArray_DIMS(weights)[3];
}
else {
if ((dH != 1) || (padH == -1)) {
// vertical subsampling or half padding, kernel height is specified
if (%(height)s != -1) {
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s;
}
else if (padH == -2) {
......@@ -662,7 +662,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
}
if ((dW != 1) || (padW == -1)) {
if (%(width)s != -1) {
kW = %(width)s;
}
else if (padW == -2) {
......@@ -671,15 +671,6 @@ class BaseGpuCorrMM(CGpuKernelBase):
else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed kernel shape %%lldx%%lld "
"does not match given shape %%lldx%%lld",
(long long)kH, (long long)kW, (long long)%(height)s, (long long)%(width)s);
%(fail)s
}
}
// Implicit dilated kernel size
......@@ -738,20 +729,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_typecode = top->ga.typecode;
out_context = top->context;
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed output shape %%lldx%%lld "
"does not match given shape %%lldx%%lld",
(long long)out_dim[2], (long long)out_dim[3],
(long long)%(height)s, (long long)%(width)s);
%(fail)s
}
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n");
......@@ -1105,7 +1086,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
def c_code_cache_version(self):
# raise this whenever modifying the code below.
return (3,)
return (4,)
def c_code_helper(self, bottom, weights, top, direction, sub,
height=None, width=None, depth=None):
......@@ -1245,8 +1226,8 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
kD = PyGpuArray_DIMS(weights)[4];
}
else {
if ((dH != 1) || (padH == -1)) {
// vertical subsampling or half padding, kernel height is specified
if (%(height)s != -1) {
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s;
}
else if (padH == -2) {
......@@ -1257,7 +1238,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
}
if ((dW != 1) || (padW == -1)) {
if (%(width)s != -1) {
kW = %(width)s;
}
else if (padW == -2) {
......@@ -1266,7 +1247,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((dD != 1) || (padD == -1)) {
if (%(depth)s != -1) {
kD = %(depth)s;
}
else if (padD == -2) {
......@@ -1275,17 +1256,6 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
else {
kD = (PyGpuArray_DIMS(bottom)[4] + 2*padD - (PyGpuArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
}
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW) ||
(%(depth)s != -1 && %(depth)s != kD))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed kernel shape %%lldx%%lldx%%lld "
"does not match given shape %%lldx%%lldx%%lld",
(long long)kH, (long long)kW, (long long)kD,
(long long)%(height)s, (long long)%(width)s, (long long)%(depth)s);
%(fail)s
}
}
// Implicit dilated kernel size
......@@ -1357,22 +1327,11 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (dD != 1) ? %(depth)s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (%(depth)s != -1) ? %(depth)s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
out_typecode = top->ga.typecode;
out_context = top->context;
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]) ||
(%(depth)s != -1 && %(depth)s != out_dim[4]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed output shape %%lldx%%lldx%%lld "
"does not match given shape %%lldx%%lldx%%lld",
(long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4],
(long long)%(height)s, (long long)%(width)s, (long long)%(depth)s);
%(fail)s
}
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n");
......
......@@ -425,9 +425,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
const size_t dil_kW = (kW - 1) * dilW + 1;
const size_t dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const size_t topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const size_t topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const size_t topDepth = (bottomDepth + 2*padD - dil_kD) / dD + 1;
const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const size_t topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const size_t topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const size_t topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const size_t topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyGpuArray_DIMS(top)[0] ||
nFilters != PyGpuArray_DIMS(top)[1] ||
topHeight != PyGpuArray_DIMS(top)[2] ||
......
......@@ -360,8 +360,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const size_t dil_kH = (kH - 1) * dilH + 1;
const size_t dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const size_t topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const size_t topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const size_t topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const size_t topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyGpuArray_DIMS(top)[0] ||
nFilters != PyGpuArray_DIMS(top)[1] ||
topHeight != PyGpuArray_DIMS(top)[2] ||
......
......@@ -922,7 +922,7 @@ class BaseGpuCorrMM(GpuOp):
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (0, 27)
return (0, 28)
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
......@@ -1042,8 +1042,8 @@ class BaseGpuCorrMM(GpuOp):
kW = CudaNdarray_HOST_DIMS(weights)[3];
}
else {
if ((dH != 1) || (padH == -1)) {
// vertical subsampling or half padding, kernel height is specified
if (%(height)s != -1) {
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s;
}
else if (padH == -2) {
......@@ -1054,7 +1054,7 @@ class BaseGpuCorrMM(GpuOp):
// explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
}
if ((dW != 1) || (padW == -1)) {
if (%(width)s != -1) {
kW = %(width)s;
}
else if (padW == -2) {
......@@ -1063,15 +1063,6 @@ class BaseGpuCorrMM(GpuOp):
else {
kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed kernel shape %%dx%%d "
"does not match given shape %%dx%%d",
kH, kW, %(height)s, %(width)s);
%(fail)s
}
}
// Implicit dilated kernel size
......@@ -1124,18 +1115,8 @@ class BaseGpuCorrMM(GpuOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed output shape %%dx%%d "
"does not match given shape %%dx%%d",
out_dim[2], out_dim[3],
%(height)s, %(width)s);
%(fail)s
}
out_dim[2] = (%(height)s != -1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (%(width)s != -1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n");
......@@ -1500,7 +1481,7 @@ class BaseGpuCorr3dMM(GpuOp):
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (0, 26)
return (0, 27)
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
......@@ -1642,9 +1623,9 @@ class BaseGpuCorr3dMM(GpuOp):
}
else
{
if ((dH != 1) || (padH == -1))
if (%(height)s != -1)
{
// vertical subsampling or half padding, kernel height is specified
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s;
}
else if (padH == -2)
......@@ -1657,7 +1638,7 @@ class BaseGpuCorr3dMM(GpuOp):
// explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
}
if ((dW != 1) || (padW == -1))
if (%(width)s != -1)
{
kW = %(width)s;
}
......@@ -1669,7 +1650,7 @@ class BaseGpuCorr3dMM(GpuOp):
{
kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((dD != 1) || (padD == -1))
if (%(depth)s != -1)
{
kD = %(depth)s;
}
......@@ -1681,16 +1662,6 @@ class BaseGpuCorr3dMM(GpuOp):
{
kD = (CudaNdarray_HOST_DIMS(bottom)[4] + 2*padD - (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD - 1) / dilD+ 1;
}
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW) ||
(%(depth)s != -1 && %(depth)s != kD))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed kernel shape %%dx%%dx%%d "
"does not match given shape %%dx%%dx%%d",
kH, kW, kD, %(height)s, %(width)s, %(depth)s);
%(fail)s
}
}
// Implicit dilated kernel size
......@@ -1763,20 +1734,9 @@ class BaseGpuCorr3dMM(GpuOp):
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (dD != 1) ? %(depth)s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]) ||
(%(depth)s != -1 && %(depth)s != out_dim[4]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed output shape %%dx%%dx%%d "
"does not match given shape %%dx%%dx%%d",
out_dim[2], out_dim[3], out_dim[4],
%(height)s, %(width)s, %(depth)s);
%(fail)s
}
out_dim[2] = (%(height)s != -1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (%(width)s != -1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (%(depth)s != -1) ? %(depth)s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n");
......
......@@ -429,9 +429,17 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = int((bottomHeight + 2*padH - dil_kH) / dH) + 1;
const int topWidth = int((bottomWidth + 2*padW - dil_kW) / dW) + 1;
const int topDepth = int((bottomDepth + 2*padD - dil_kD) / dD) + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
......
......@@ -333,8 +333,15 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
......
......@@ -123,7 +123,7 @@ class BaseCorrMM(gof.OpenMPOp):
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (2, self.openmp, blas_header_version())
return (3, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
......@@ -275,8 +275,8 @@ class BaseCorrMM(gof.OpenMPOp):
kW = PyArray_DIMS(weights)[3];
}
else {
if ((dH != 1) || (padH == -1)) {
// vertical subsampling or half padding, kernel height is specified
if (%(height)s != -1) {
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s;
}
else if (padH == -2) {
......@@ -287,7 +287,8 @@ class BaseCorrMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}
if ((dW != 1) || (padW == -1)) {
if (%(width)s != -1) {
// kernel width is specified (perhaps horizontal subsampling or half padding)
kW = %(width)s;
}
else if (padW == -2) {
......@@ -296,15 +297,6 @@ class BaseCorrMM(gof.OpenMPOp):
else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW))
{
PyErr_Format(PyExc_ValueError,
"BaseCorrMM: computed kernel shape %%lldx%%lld "
"does not match given shape %%lldx%%lld",
(long long)kH, (long long)kW, (long long)%(height)s, (long long)%(width)s);
%(fail)s
}
}
// Implicit dilated kernel size
......@@ -357,18 +349,8 @@ class BaseCorrMM(gof.OpenMPOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]))
{
PyErr_Format(PyExc_ValueError,
"BaseCorrMM: computed output shape %%lldx%%lld "
"does not match given shape %%lldx%%lld",
(long long)out_dim[2], (long long)out_dim[3],
(long long)%(height)s, (long long)%(width)s);
%(fail)s
}
out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2\\n");
......
......@@ -123,7 +123,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (2, self.openmp, blas_header_version())
return (3, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of
......@@ -292,8 +292,8 @@ class BaseCorr3dMM(gof.OpenMPOp):
kD = PyArray_DIMS(weights)[4];
}
else {
if ((dH != 1) || (padH == -1)) {
// vertical subsampling or half padding, kernel height is specified
if (%(height)s != -1) {
// kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s;
}
else if (padH == -2) {
......@@ -304,7 +304,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}
if ((dW != 1) || (padW == -1)) {
if (%(width)s != -1) {
kW = %(width)s;
}
else if (padW == -2) {
......@@ -313,7 +313,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
if ((dD != 1) || (padD == -1)) {
if (%(depth)s != -1) {
kD = %(depth)s;
}
else if (padD == -2) {
......@@ -322,17 +322,6 @@ class BaseCorr3dMM(gof.OpenMPOp):
else {
kD = (PyArray_DIMS(bottom)[4] + 2*padD - (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
}
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW) ||
(%(depth)s != -1 && %(depth)s != kD))
{
PyErr_Format(PyExc_ValueError,
"BaseCorr3dMM: computed kernel shape %%lldx%%lldx%%lld "
"does not match given shape %%lldx%%lldx%%lld",
(long long)kH, (long long)kW, (long long)kD,
(long long)%(height)s, (long long)%(width)s, (long long)%(depth)s);
%(fail)s
}
}
// Implicit dilated kernel size
......@@ -398,20 +387,9 @@ class BaseCorr3dMM(gof.OpenMPOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[4] = (npy_intp)((dD != 1) ? %(depth)s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]) ||
(%(depth)s != -1 && %(depth)s != out_dim[4]))
{
PyErr_Format(PyExc_ValueError,
"BaseCorr3dMM: computed output shape %%lldx%%lldx%%lld "
"does not match given shape %%lldx%%lldx%%lld",
(long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4],
(long long)%(height)s, (long long)%(width)s, (long long)%(depth)s);
%(fail)s
}
out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[4] = (npy_intp)((%(depth)s != -1) ? %(depth)s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2\\n");
......
......@@ -188,9 +188,17 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const int topDepth = (bottomDepth + 2*padD - dil_kD) / dD + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
......
......@@ -164,8 +164,15 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1;
const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] ||
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论