提交 30dd8bdc authored 作者: Gijs van Tulder's avatar Gijs van Tulder

CorrMM should use height/width if it is given.

上级 676cf29d
...@@ -528,7 +528,7 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -528,7 +528,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def c_code_cache_version(self): def c_code_cache_version(self):
# Raise this whenever modifying the code below. # Raise this whenever modifying the code below.
return (3,) return (4,)
def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None): def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None):
""" """
...@@ -650,8 +650,8 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -650,8 +650,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
kW = PyGpuArray_DIMS(weights)[3]; kW = PyGpuArray_DIMS(weights)[3];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -662,7 +662,7 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -662,7 +662,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ; kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -671,15 +671,6 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -671,15 +671,6 @@ class BaseGpuCorrMM(CGpuKernelBase):
else { else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed kernel shape %%lldx%%lld "
"does not match given shape %%lldx%%lld",
(long long)kH, (long long)kW, (long long)%(height)s, (long long)%(width)s);
%(fail)s
}
} }
// Implicit dilated kernel size // Implicit dilated kernel size
...@@ -738,20 +729,10 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -738,20 +729,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0]; out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1]; out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH; out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW; out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_typecode = top->ga.typecode; out_typecode = top->ga.typecode;
out_context = top->context; out_context = top->context;
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed output shape %%lldx%%lld "
"does not match given shape %%lldx%%lld",
(long long)out_dim[2], (long long)out_dim[3],
(long long)%(height)s, (long long)%(width)s);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n");
...@@ -1105,7 +1086,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1105,7 +1086,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying the code below. # raise this whenever modifying the code below.
return (3,) return (4,)
def c_code_helper(self, bottom, weights, top, direction, sub, def c_code_helper(self, bottom, weights, top, direction, sub,
height=None, width=None, depth=None): height=None, width=None, depth=None):
...@@ -1245,8 +1226,8 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1245,8 +1226,8 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
kD = PyGpuArray_DIMS(weights)[4]; kD = PyGpuArray_DIMS(weights)[4];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -1257,7 +1238,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1257,7 +1238,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ; kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -1266,7 +1247,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1266,7 +1247,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
else { else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
if ((dD != 1) || (padD == -1)) { if (%(depth)s != -1) {
kD = %(depth)s; kD = %(depth)s;
} }
else if (padD == -2) { else if (padD == -2) {
...@@ -1275,17 +1256,6 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1275,17 +1256,6 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
else { else {
kD = (PyGpuArray_DIMS(bottom)[4] + 2*padD - (PyGpuArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1; kD = (PyGpuArray_DIMS(bottom)[4] + 2*padD - (PyGpuArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
} }
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW) ||
(%(depth)s != -1 && %(depth)s != kD))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed kernel shape %%lldx%%lldx%%lld "
"does not match given shape %%lldx%%lldx%%lld",
(long long)kH, (long long)kW, (long long)kD,
(long long)%(height)s, (long long)%(width)s, (long long)%(depth)s);
%(fail)s
}
} }
// Implicit dilated kernel size // Implicit dilated kernel size
...@@ -1357,22 +1327,11 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1357,22 +1327,11 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0]; out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1]; out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH; out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW; out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (dD != 1) ? %(depth)s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD; out_dim[4] = (%(depth)s != -1) ? %(depth)s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
out_typecode = top->ga.typecode; out_typecode = top->ga.typecode;
out_context = top->context; out_context = top->context;
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]) ||
(%(depth)s != -1 && %(depth)s != out_dim[4]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed output shape %%lldx%%lldx%%lld "
"does not match given shape %%lldx%%lldx%%lld",
(long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4],
(long long)%(height)s, (long long)%(width)s, (long long)%(depth)s);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n");
......
...@@ -425,9 +425,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom, ...@@ -425,9 +425,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
const size_t dil_kW = (kW - 1) * dilW + 1; const size_t dil_kW = (kW - 1) * dilW + 1;
const size_t dil_kD = (kD - 1) * dilD + 1; const size_t dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth) // top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const size_t topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const size_t topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const size_t topDepth = (bottomDepth + 2*padD - dil_kD) / dD + 1; const size_t topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const size_t topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const size_t topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const size_t topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyGpuArray_DIMS(top)[0] || if (batchSize != PyGpuArray_DIMS(top)[0] ||
nFilters != PyGpuArray_DIMS(top)[1] || nFilters != PyGpuArray_DIMS(top)[1] ||
topHeight != PyGpuArray_DIMS(top)[2] || topHeight != PyGpuArray_DIMS(top)[2] ||
......
...@@ -360,8 +360,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -360,8 +360,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const size_t dil_kH = (kH - 1) * dilH + 1; const size_t dil_kH = (kH - 1) * dilH + 1;
const size_t dil_kW = (kW - 1) * dilW + 1; const size_t dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth) // top: (batchSize, nFilters, topHeight, topWidth)
const size_t topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const size_t topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const size_t topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const size_t topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyGpuArray_DIMS(top)[0] || if (batchSize != PyGpuArray_DIMS(top)[0] ||
nFilters != PyGpuArray_DIMS(top)[1] || nFilters != PyGpuArray_DIMS(top)[1] ||
topHeight != PyGpuArray_DIMS(top)[2] || topHeight != PyGpuArray_DIMS(top)[2] ||
......
...@@ -922,7 +922,7 @@ class BaseGpuCorrMM(GpuOp): ...@@ -922,7 +922,7 @@ class BaseGpuCorrMM(GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (0, 27) return (0, 28)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -1042,8 +1042,8 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1042,8 +1042,8 @@ class BaseGpuCorrMM(GpuOp):
kW = CudaNdarray_HOST_DIMS(weights)[3]; kW = CudaNdarray_HOST_DIMS(weights)[3];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -1054,7 +1054,7 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1054,7 +1054,7 @@ class BaseGpuCorrMM(GpuOp):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ; kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -1063,15 +1063,6 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1063,15 +1063,6 @@ class BaseGpuCorrMM(GpuOp):
else { else {
kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed kernel shape %%dx%%d "
"does not match given shape %%dx%%d",
kH, kW, %(height)s, %(width)s);
%(fail)s
}
} }
// Implicit dilated kernel size // Implicit dilated kernel size
...@@ -1124,18 +1115,8 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1124,18 +1115,8 @@ class BaseGpuCorrMM(GpuOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0]; out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1]; out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH; out_dim[2] = (%(height)s != -1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW; out_dim[3] = (%(width)s != -1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorrMM: computed output shape %%dx%%d "
"does not match given shape %%dx%%d",
out_dim[2], out_dim[3],
%(height)s, %(width)s);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n");
...@@ -1500,7 +1481,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1500,7 +1481,7 @@ class BaseGpuCorr3dMM(GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (0, 26) return (0, 27)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -1642,9 +1623,9 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1642,9 +1623,9 @@ class BaseGpuCorr3dMM(GpuOp):
} }
else else
{ {
if ((dH != 1) || (padH == -1)) if (%(height)s != -1)
{ {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) else if (padH == -2)
...@@ -1657,7 +1638,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1657,7 +1638,7 @@ class BaseGpuCorr3dMM(GpuOp):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ; kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
} }
if ((dW != 1) || (padW == -1)) if (%(width)s != -1)
{ {
kW = %(width)s; kW = %(width)s;
} }
...@@ -1669,7 +1650,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1669,7 +1650,7 @@ class BaseGpuCorr3dMM(GpuOp):
{ {
kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
if ((dD != 1) || (padD == -1)) if (%(depth)s != -1)
{ {
kD = %(depth)s; kD = %(depth)s;
} }
...@@ -1681,16 +1662,6 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1681,16 +1662,6 @@ class BaseGpuCorr3dMM(GpuOp):
{ {
kD = (CudaNdarray_HOST_DIMS(bottom)[4] + 2*padD - (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD - 1) / dilD+ 1; kD = (CudaNdarray_HOST_DIMS(bottom)[4] + 2*padD - (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD - 1) / dilD+ 1;
} }
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW) ||
(%(depth)s != -1 && %(depth)s != kD))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed kernel shape %%dx%%dx%%d "
"does not match given shape %%dx%%dx%%d",
kH, kW, kD, %(height)s, %(width)s, %(depth)s);
%(fail)s
}
} }
// Implicit dilated kernel size // Implicit dilated kernel size
...@@ -1763,20 +1734,9 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1763,20 +1734,9 @@ class BaseGpuCorr3dMM(GpuOp):
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0]; out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1]; out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH; out_dim[2] = (%(height)s != -1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW; out_dim[3] = (%(width)s != -1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (dD != 1) ? %(depth)s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD; out_dim[4] = (%(depth)s != -1) ? %(depth)s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]) ||
(%(depth)s != -1 && %(depth)s != out_dim[4]))
{
PyErr_Format(PyExc_ValueError,
"BaseGpuCorr3dMM: computed output shape %%dx%%dx%%d "
"does not match given shape %%dx%%dx%%d",
out_dim[2], out_dim[3], out_dim[4],
%(height)s, %(width)s, %(depth)s);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n");
......
...@@ -429,9 +429,17 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom, ...@@ -429,9 +429,17 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
const int dil_kW = (kW - 1) * dilW + 1; const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1; const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth) // top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = int((bottomHeight + 2*padH - dil_kH) / dH) + 1; const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = int((bottomWidth + 2*padW - dil_kW) / dW) + 1; const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepth = int((bottomDepth + 2*padD - dil_kD) / dD) + 1; const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] || if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] || nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] || topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
......
...@@ -333,8 +333,15 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -333,8 +333,15 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
const int dil_kH = (kH - 1) * dilH + 1; const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1; const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth) // top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] || if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] || nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] || topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
......
...@@ -123,7 +123,7 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -123,7 +123,7 @@ class BaseCorrMM(gof.OpenMPOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (2, self.openmp, blas_header_version()) return (3, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -275,8 +275,8 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -275,8 +275,8 @@ class BaseCorrMM(gof.OpenMPOp):
kW = PyArray_DIMS(weights)[3]; kW = PyArray_DIMS(weights)[3];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -287,7 +287,8 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -287,7 +287,8 @@ class BaseCorrMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1; kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
// kernel width is specified (perhaps horizontal subsampling or half padding)
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -296,15 +297,6 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -296,15 +297,6 @@ class BaseCorrMM(gof.OpenMPOp):
else { else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW))
{
PyErr_Format(PyExc_ValueError,
"BaseCorrMM: computed kernel shape %%lldx%%lld "
"does not match given shape %%lldx%%lld",
(long long)kH, (long long)kW, (long long)%(height)s, (long long)%(width)s);
%(fail)s
}
} }
// Implicit dilated kernel size // Implicit dilated kernel size
...@@ -357,18 +349,8 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -357,18 +349,8 @@ class BaseCorrMM(gof.OpenMPOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0]; out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1]; out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH); out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW); out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]))
{
PyErr_Format(PyExc_ValueError,
"BaseCorrMM: computed output shape %%lldx%%lld "
"does not match given shape %%lldx%%lld",
(long long)out_dim[2], (long long)out_dim[3],
(long long)%(height)s, (long long)%(width)s);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2\\n");
......
...@@ -123,7 +123,7 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -123,7 +123,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (2, self.openmp, blas_header_version()) return (3, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -292,8 +292,8 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -292,8 +292,8 @@ class BaseCorr3dMM(gof.OpenMPOp):
kD = PyArray_DIMS(weights)[4]; kD = PyArray_DIMS(weights)[4];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -304,7 +304,7 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -304,7 +304,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1; kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -313,7 +313,7 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -313,7 +313,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
else { else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
if ((dD != 1) || (padD == -1)) { if (%(depth)s != -1) {
kD = %(depth)s; kD = %(depth)s;
} }
else if (padD == -2) { else if (padD == -2) {
...@@ -322,17 +322,6 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -322,17 +322,6 @@ class BaseCorr3dMM(gof.OpenMPOp):
else { else {
kD = (PyArray_DIMS(bottom)[4] + 2*padD - (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1; kD = (PyArray_DIMS(bottom)[4] + 2*padD - (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1;
} }
if ((%(height)s != -1 && %(height)s != kH) ||
(%(width)s != -1 && %(width)s != kW) ||
(%(depth)s != -1 && %(depth)s != kD))
{
PyErr_Format(PyExc_ValueError,
"BaseCorr3dMM: computed kernel shape %%lldx%%lldx%%lld "
"does not match given shape %%lldx%%lldx%%lld",
(long long)kH, (long long)kW, (long long)kD,
(long long)%(height)s, (long long)%(width)s, (long long)%(depth)s);
%(fail)s
}
} }
// Implicit dilated kernel size // Implicit dilated kernel size
...@@ -398,20 +387,9 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -398,20 +387,9 @@ class BaseCorr3dMM(gof.OpenMPOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0]; out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1]; out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH); out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW); out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[4] = (npy_intp)((dD != 1) ? %(depth)s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD); out_dim[4] = (npy_intp)((%(depth)s != -1) ? %(depth)s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
if ((%(height)s != -1 && %(height)s != out_dim[2]) ||
(%(width)s != -1 && %(width)s != out_dim[3]) ||
(%(depth)s != -1 && %(depth)s != out_dim[4]))
{
PyErr_Format(PyExc_ValueError,
"BaseCorr3dMM: computed output shape %%lldx%%lldx%%lld "
"does not match given shape %%lldx%%lldx%%lld",
(long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4],
(long long)%(height)s, (long long)%(width)s, (long long)%(depth)s);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2\\n");
......
...@@ -188,9 +188,17 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom, ...@@ -188,9 +188,17 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
const int dil_kW = (kW - 1) * dilW + 1; const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1; const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth) // top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepth = (bottomDepth + 2*padD - dil_kD) / dD + 1; const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyArray_DIMS(top)[0] || if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] || nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] || topHeight != PyArray_DIMS(top)[2] ||
......
...@@ -164,8 +164,15 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -164,8 +164,15 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const int dil_kH = (kH - 1) * dilH + 1; const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1; const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth) // top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyArray_DIMS(top)[0] || if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] || nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] || topHeight != PyArray_DIMS(top)[2] ||
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论