提交 c072d669 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5267 from gvtulder/f-abstractconv-differences

Minor inconsistency in AbstractConv_gradInput implementations
...@@ -528,7 +528,7 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -528,7 +528,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def c_code_cache_version(self): def c_code_cache_version(self):
# Raise this whenever modifying the code below. # Raise this whenever modifying the code below.
return (2,) return (7,)
def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None): def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None):
""" """
...@@ -558,19 +558,19 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -558,19 +558,19 @@ class BaseGpuCorrMM(CGpuKernelBase):
sub sub
Dictionary of substitutions useable to help generating the C code. Dictionary of substitutions useable to help generating the C code.
height height
If self.subsample[0] != 1, a variable giving the height of the Required if self.subsample[0] != 1, a variable giving the height of
filters for direction="backprop weights" or the height of the input the filters for direction="backprop weights" or the height of the
images for direction="backprop inputs". input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the Required if self.border_mode == 'half', a variable giving the height
filters for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
width width
If self.subsample[1] != 1, a variable giving the width of the Required if self.subsample[1] != 1, a variable giving the width of
filters for direction="backprop weights" or the width of the the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs". input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the Required if self.border_mode == 'half', a variable giving the width
filters for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
""" """
dH, dW = self.subsample dH, dW = self.subsample
...@@ -599,18 +599,18 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -599,18 +599,18 @@ class BaseGpuCorrMM(CGpuKernelBase):
# When subsampling, we cannot unambiguously infer the height and width # When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given. # of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size. # Similarly, when pad="half", we cannot infer the weight size.
if height:
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height
else:
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)): if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
if not height:
raise ValueError("height must be given for backprop with vertical sampling or pad='half'") raise ValueError("height must be given for backprop with vertical sampling or pad='half'")
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height height = '-1'
if width:
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width
else: else:
height = '0'
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)): if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
if not width:
raise ValueError("width must be given for backprop with horizontal sampling or pad='half'") raise ValueError("width must be given for backprop with horizontal sampling or pad='half'")
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width width = '-1'
else:
width = '0'
sync = "" sync = ""
if config.gpuarray.sync: if config.gpuarray.sync:
sync = """ sync = """
...@@ -643,15 +643,15 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -643,15 +643,15 @@ class BaseGpuCorrMM(CGpuKernelBase):
// Obtain or infer kernel width and height // Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding) // (we need to know it early to be able to handle auto-padding)
size_t kH, kW; size_t kH, kW, dil_kH, dil_kW;
if (direction != 1) { if (direction != 1) {
// weight is an input variable, we can just read its shape // weight is an input variable, we can just read its shape
kH = PyGpuArray_DIMS(weights)[2]; kH = PyGpuArray_DIMS(weights)[2];
kW = PyGpuArray_DIMS(weights)[3]; kW = PyGpuArray_DIMS(weights)[3];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -662,7 +662,7 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -662,7 +662,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ; kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -674,8 +674,8 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -674,8 +674,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
} }
// Implicit dilated kernel size // Implicit dilated kernel size
size_t dil_kH = (kH - 1) * dilH + 1; dil_kH = (kH - 1) * dilH + 1;
size_t dil_kW = (kW - 1) * dilW + 1; dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested // Auto-padding if requested
if (padH == -1) { // vertical half padding if (padH == -1) { // vertical half padding
...@@ -700,7 +700,9 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -700,7 +700,9 @@ class BaseGpuCorrMM(CGpuKernelBase):
} }
// Infer output shape and type // Infer output shape and type
size_t out_dim[4]; // The inferred shape can be negative.
long long out_dim[4];
size_t out_dim_size[4];
int out_typecode; int out_typecode;
PyGpuContextObject *out_context; PyGpuContextObject *out_context;
switch(direction) { switch(direction) {
...@@ -713,6 +715,20 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -713,6 +715,20 @@ class BaseGpuCorrMM(CGpuKernelBase):
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1; out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1;
out_typecode = bottom->ga.typecode; out_typecode = bottom->ga.typecode;
out_context = bottom->context; out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)s
}
break; break;
case 1: // backprop wrt. weights case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width) // output is weights: (num_filters, num_channels, height, width)
...@@ -723,27 +739,60 @@ class BaseGpuCorrMM(CGpuKernelBase): ...@@ -723,27 +739,60 @@ class BaseGpuCorrMM(CGpuKernelBase):
out_dim[3] = kW; // how convenient out_dim[3] = kW; // how convenient
out_typecode = top->ga.typecode; out_typecode = top->ga.typecode;
out_context = top->context; out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)s
}
break; break;
case 2: // backprop wrt. inputs case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width) // output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0]; out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1]; out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH; out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW; out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_typecode = top->ga.typecode; out_typecode = top->ga.typecode;
out_context = top->context; out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weight shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n");
%(fail)s %(fail)s
} }
out_dim_size[0] = (size_t)out_dim[0];
out_dim_size[1] = (size_t)out_dim[1];
out_dim_size[2] = (size_t)out_dim[2];
out_dim_size[3] = (size_t)out_dim[3];
// Prepare output array // Prepare output array
if (theano_prep_output(&%(out)s, 4, out_dim, out_typecode, GA_C_ORDER, out_context) != 0) if (theano_prep_output(&%(out)s, 4, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
{ {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of %%ld x %%ld x %%ld x %%ld", "BaseGpuCorrMM: Failed to allocate output of %%lld x %%lld x %%lld x %%lld",
out_dim[0], out_dim[1], out_dim[2], out_dim[3]); out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)s %(fail)s
} }
...@@ -875,15 +924,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM): ...@@ -875,15 +924,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
raise TypeError('img must be 4D tensor') raise TypeError('img must be 4D tensor')
if topgrad.type.ndim != 4: if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor') raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) or self.border_mode == "half":
if shape is None: if shape is None:
if self.subsample != (1, 1) or self.border_mode == "half":
raise ValueError('shape must be given if subsample != (1, 1)' raise ValueError('shape must be given if subsample != (1, 1)'
' or border_mode == "half"') ' or border_mode == "half"')
height_width = []
else:
height_width = [shape[0], shape[1]] height_width = [shape[0], shape[1]]
assert shape[0].ndim == 0 assert shape[0].ndim == 0
assert shape[1].ndim == 0 assert shape[1].ndim == 0
else:
height_width = []
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1], broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False] False, False]
...@@ -946,10 +995,12 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM): ...@@ -946,10 +995,12 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
raise TypeError('kern must be 4D tensor') raise TypeError('kern must be 4D tensor')
if topgrad.type.ndim != 4: if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor') raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) and shape is None: if shape is None:
if self.subsample != (1, 1):
raise ValueError('shape must be given if subsample != (1, 1)') raise ValueError('shape must be given if subsample != (1, 1)')
height_width = [shape[0], shape[1]] if self.subsample != (1, 1) else [] height_width = []
if height_width: else:
height_width = [shape[0], shape[1]]
assert shape[0].ndim == 0 assert shape[0].ndim == 0
assert shape[1].ndim == 0 assert shape[1].ndim == 0
...@@ -1074,7 +1125,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1074,7 +1125,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying the code below. # raise this whenever modifying the code below.
return (2,) return (7,)
def c_code_helper(self, bottom, weights, top, direction, sub, def c_code_helper(self, bottom, weights, top, direction, sub,
height=None, width=None, depth=None): height=None, width=None, depth=None):
...@@ -1105,26 +1156,26 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1105,26 +1156,26 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
sub sub
Dictionary of substitutions useable to help generating the C code. Dictionary of substitutions useable to help generating the C code.
height height
If self.subsample[0] != 1, a variable giving the height of the Required if self.subsample[0] != 1, a variable giving the height of
filters for direction="backprop weights" or the height of the input the filters for direction="backprop weights" or the height of the
images for direction="backprop inputs". input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the Required if self.border_mode == 'half', a variable giving the height
filters for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
width width
If self.subsample[1] != 1, a variable giving the width of the Required if self.subsample[1] != 1, a variable giving the width of
filters for direction="backprop weights" or the width of the the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs". input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the Required if self.border_mode == 'half', a variable giving the width
filters for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
depth depth
If self.subsample[2] != 1, a variable giving the depth of the Required if self.subsample[2] != 1, a variable giving the depth of
filters for direction="backprop weights" or the depth of the the filters for direction="backprop weights" or the depth of the
input images for direction="backprop inputs". input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the depth of the Required if self.border_mode == 'half', a variable giving the depth
filters for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
""" """
dH, dW, dD = self.subsample dH, dW, dD = self.subsample
...@@ -1153,24 +1204,24 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1153,24 +1204,24 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
# When subsampling, we cannot unambiguously infer the height and width # When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given. # of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size. # Similarly, when pad="half", we cannot infer the weight size.
if height:
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height
else:
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)): if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
if not height:
raise ValueError("height must be given for backprop with vertical sampling or pad='half'") raise ValueError("height must be given for backprop with vertical sampling or pad='half'")
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height height = '-1'
if width:
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width
else: else:
height = '0'
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)): if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
if not width:
raise ValueError("width must be given for backprop with horizontal sampling or pad='half'") raise ValueError("width must be given for backprop with horizontal sampling or pad='half'")
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width width = '-1'
if depth:
depth = '(*(npy_int*)(PyArray_DATA(%s)))' % depth
else: else:
width = '0'
if ((direction != 0) and (dD != 1)) or ((direction == 1) and (padD == -1)): if ((direction != 0) and (dD != 1)) or ((direction == 1) and (padD == -1)):
if not depth:
raise ValueError("depth must be given for backprop with horizontal sampling or pad='half'") raise ValueError("depth must be given for backprop with horizontal sampling or pad='half'")
depth = '(*(npy_int*)(PyArray_DATA(%s)))' % depth depth = '-1'
else:
depth = '0'
sync = "" sync = ""
if config.gpuarray.sync: if config.gpuarray.sync:
sync = """ sync = """
...@@ -1206,7 +1257,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1206,7 +1257,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// Obtain or infer kernel height, width and depth // Obtain or infer kernel height, width and depth
// (we need to know it early to be able to handle auto-padding) // (we need to know it early to be able to handle auto-padding)
size_t kH, kW, kD; size_t kH, kW, kD, dil_kH, dil_kW, dil_kD;
if (direction != 1) { if (direction != 1) {
// weight is an input variable, we can just read its shape // weight is an input variable, we can just read its shape
kH = PyGpuArray_DIMS(weights)[2]; kH = PyGpuArray_DIMS(weights)[2];
...@@ -1214,8 +1265,8 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1214,8 +1265,8 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
kD = PyGpuArray_DIMS(weights)[4]; kD = PyGpuArray_DIMS(weights)[4];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -1226,7 +1277,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1226,7 +1277,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ; kH = (PyGpuArray_DIMS(bottom)[2] + 2*padH - (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -1235,7 +1286,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1235,7 +1286,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
else { else {
kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (PyGpuArray_DIMS(bottom)[3] + 2*padW - (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
if ((dD != 1) || (padD == -1)) { if (%(depth)s != -1) {
kD = %(depth)s; kD = %(depth)s;
} }
else if (padD == -2) { else if (padD == -2) {
...@@ -1247,9 +1298,9 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1247,9 +1298,9 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
} }
// Implicit dilated kernel size // Implicit dilated kernel size
size_t dil_kH = (kH - 1) * dilH + 1; dil_kH = (kH - 1) * dilH + 1;
size_t dil_kW = (kW - 1) * dilW + 1; dil_kW = (kW - 1) * dilW + 1;
size_t dil_kD = (kD - 1) * dilD + 1; dil_kD = (kD - 1) * dilD + 1;
// Auto-padding if requested // Auto-padding if requested
if (padH == -1) { // vertical half padding if (padH == -1) { // vertical half padding
...@@ -1284,7 +1335,9 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1284,7 +1335,9 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
} }
// Infer output shape and type // Infer output shape and type
size_t out_dim[5]; // The inferred shape can be negative.
long long out_dim[5];
size_t out_dim_size[5];
int out_typecode; int out_typecode;
PyGpuContextObject *out_context; PyGpuContextObject *out_context;
switch(direction) { switch(direction) {
...@@ -1298,6 +1351,22 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1298,6 +1351,22 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
out_dim[4] = (PyGpuArray_DIMS(bottom)[4] + 2*padD - ((PyGpuArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1; out_dim[4] = (PyGpuArray_DIMS(bottom)[4] + 2*padD - ((PyGpuArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1;
out_typecode = bottom->ga.typecode; out_typecode = bottom->ga.typecode;
out_context = bottom->context; out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(bottom)[4],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(weights)[4],
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4]);
%(fail)s
}
break; break;
case 1: // backprop wrt. weights case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width, depth) // output is weights: (num_filters, num_channels, height, width, depth)
...@@ -1309,28 +1378,66 @@ class BaseGpuCorr3dMM(CGpuKernelBase): ...@@ -1309,28 +1378,66 @@ class BaseGpuCorr3dMM(CGpuKernelBase):
out_dim[4] = kD; out_dim[4] = kD;
out_typecode = top->ga.typecode; out_typecode = top->ga.typecode;
out_context = top->context; out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(bottom)[4],
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3],
PyGpuArray_DIMS(top)[4]);
%(fail)s
}
break; break;
case 2: // backprop wrt. inputs case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width, depth) // output is bottom: (batchsize, num_channels, height, width, depth)
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0]; out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[1]; out_dim[1] = PyGpuArray_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH; out_dim[2] = (%(height)s != -1) ? %(height)s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW; out_dim[3] = (%(width)s != -1) ? %(width)s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (dD != 1) ? %(depth)s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD; out_dim[4] = (%(depth)s != -1) ? %(depth)s : (PyGpuArray_DIMS(top)[4] - 1) * dD + (PyGpuArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
out_typecode = top->ga.typecode; out_typecode = top->ga.typecode;
out_context = top->context; out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(weights)[4],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3],
PyGpuArray_DIMS(top)[4]);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n");
%(fail)s %(fail)s
} }
out_dim_size[0] = (size_t)out_dim[0];
out_dim_size[1] = (size_t)out_dim[1];
out_dim_size[2] = (size_t)out_dim[2];
out_dim_size[3] = (size_t)out_dim[3];
out_dim_size[4] = (size_t)out_dim[4];
// Prepare output array // Prepare output array
if (theano_prep_output(&%(out)s, 5, out_dim, out_typecode, GA_C_ORDER, out_context) != 0) if (theano_prep_output(&%(out)s, 5, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
{ {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of %%ld x %%ld x %%ld x %%ld x %%ld", "BaseGpuCorrMM: Failed to allocate output of %%lld x %%lld x %%lld x %%lld x %%lld",
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4]); out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4]);
%(fail)s %(fail)s
} }
...@@ -1464,16 +1571,16 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM): ...@@ -1464,16 +1571,16 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
raise TypeError('img must be 5D tensor') raise TypeError('img must be 5D tensor')
if topgrad.type.ndim != 5: if topgrad.type.ndim != 5:
raise TypeError('topgrad must be 5D tensor') raise TypeError('topgrad must be 5D tensor')
if self.subsample != (1, 1, 1) or self.border_mode == "half":
if shape is None: if shape is None:
if self.subsample != (1, 1, 1) or self.border_mode == "half":
raise ValueError('shape must be given if subsample != (1, 1, 1)' raise ValueError('shape must be given if subsample != (1, 1, 1)'
' or border_mode == "half"') ' or border_mode == "half"')
height_width_depth = []
else:
height_width_depth = [shape[0], shape[1], shape[2]] height_width_depth = [shape[0], shape[1], shape[2]]
assert shape[0].ndim == 0 assert shape[0].ndim == 0
assert shape[1].ndim == 0 assert shape[1].ndim == 0
assert shape[2].ndim == 0 assert shape[2].ndim == 0
else:
height_width_depth = []
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1], broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False, False] False, False, False]
...@@ -1536,10 +1643,12 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM): ...@@ -1536,10 +1643,12 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
raise TypeError('kern must be 5D tensor') raise TypeError('kern must be 5D tensor')
if topgrad.type.ndim != 5: if topgrad.type.ndim != 5:
raise TypeError('topgrad must be 5D tensor') raise TypeError('topgrad must be 5D tensor')
if self.subsample != (1, 1, 1) and shape is None: if shape is None:
if self.subsample != (1, 1, 1):
raise ValueError('shape must be given if subsample != (1, 1, 1)') raise ValueError('shape must be given if subsample != (1, 1, 1)')
height_width_depth = [shape[0], shape[1], shape[2]] if self.subsample != (1, 1, 1) else [] height_width_depth = []
if height_width_depth: else:
height_width_depth = [shape[0], shape[1], shape[2]]
assert shape[0].ndim == 0 assert shape[0].ndim == 0
assert shape[1].ndim == 0 assert shape[1].ndim == 0
assert shape[2].ndim == 0 assert shape[2].ndim == 0
......
...@@ -425,9 +425,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom, ...@@ -425,9 +425,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
const size_t dil_kW = (kW - 1) * dilW + 1; const size_t dil_kW = (kW - 1) * dilW + 1;
const size_t dil_kD = (kD - 1) * dilD + 1; const size_t dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth) // top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const size_t topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const size_t topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const size_t topDepth = (bottomDepth + 2*padD - dil_kD) / dD + 1; const size_t topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const size_t topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const size_t topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const size_t topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyGpuArray_DIMS(top)[0] || if (batchSize != PyGpuArray_DIMS(top)[0] ||
nFilters != PyGpuArray_DIMS(top)[1] || nFilters != PyGpuArray_DIMS(top)[1] ||
topHeight != PyGpuArray_DIMS(top)[2] || topHeight != PyGpuArray_DIMS(top)[2] ||
...@@ -479,6 +487,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom, ...@@ -479,6 +487,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
PyGpuArrayObject *output; PyGpuArrayObject *output;
if (direction == 0) { // forward pass if (direction == 0) { // forward pass
output = top; output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im3d2col, then gemm // valid correlation: im3d2col, then gemm
// Iterate over batch // Iterate over batch
for (size_t n = 0; n < batchSize; n++) { for (size_t n = 0; n < batchSize; n++) {
...@@ -530,6 +549,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom, ...@@ -530,6 +549,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
} }
else if (direction == 1) { // backprop wrt. weights else if (direction == 1) { // backprop wrt. weights
output = weight; output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. weights could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im3col, then gemm // valid convolution: im3col, then gemm
// Iterate over batch // Iterate over batch
for (size_t n = 0; n < batchSize; n++) { for (size_t n = 0; n < batchSize; n++) {
...@@ -581,9 +611,29 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom, ...@@ -581,9 +611,29 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
return NULL; return NULL;
} }
} }
if (batchSize == 0) {
err = GpuArray_memset(&weight->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad weights could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
}
} }
else if (direction == 2) { // backprop wrt. inputs else if (direction == 2) { // backprop wrt. inputs
output = bottom; output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. inputs could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im3d // full convolution: gemm, then col2im3d
// Iterate over batch // Iterate over batch
for (size_t n = 0; n < batchSize; n++) { for (size_t n = 0; n < batchSize; n++) {
......
...@@ -360,8 +360,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -360,8 +360,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const size_t dil_kH = (kH - 1) * dilH + 1; const size_t dil_kH = (kH - 1) * dilH + 1;
const size_t dil_kW = (kW - 1) * dilW + 1; const size_t dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth) // top: (batchSize, nFilters, topHeight, topWidth)
const size_t topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const size_t topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const size_t topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const size_t topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const size_t topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const size_t topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyGpuArray_DIMS(top)[0] || if (batchSize != PyGpuArray_DIMS(top)[0] ||
nFilters != PyGpuArray_DIMS(top)[1] || nFilters != PyGpuArray_DIMS(top)[1] ||
topHeight != PyGpuArray_DIMS(top)[2] || topHeight != PyGpuArray_DIMS(top)[2] ||
...@@ -411,6 +418,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -411,6 +418,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
PyGpuArrayObject *output; PyGpuArrayObject *output;
if (direction == 0) { // forward pass if (direction == 0) { // forward pass
output = top; output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im2col, then gemm // valid correlation: im2col, then gemm
// Iterate over batch // Iterate over batch
for (size_t n = 0; n < batchSize; n++) { for (size_t n = 0; n < batchSize; n++) {
...@@ -462,6 +480,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -462,6 +480,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
} }
else if (direction == 1) { // backprop wrt. weights else if (direction == 1) { // backprop wrt. weights
output = weight; output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. weights could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im2col, then gemm // valid convolution: im2col, then gemm
// Iterate over batch // Iterate over batch
for (size_t n = 0; n < batchSize; n++) { for (size_t n = 0; n < batchSize; n++) {
...@@ -516,6 +545,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom, ...@@ -516,6 +545,17 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
} }
else if (direction == 2) { // backprop wrt. inputs else if (direction == 2) { // backprop wrt. inputs
output = bottom; output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
err = GpuArray_memset(&output->ga, 0);
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. inputs could not fill the output with zeros: %d", err);
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im // full convolution: gemm, then col2im
// Iterate over batch // Iterate over batch
for (size_t n = 0; n < batchSize; n++) { for (size_t n = 0; n < batchSize; n++) {
......
...@@ -24,7 +24,8 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d, ...@@ -24,7 +24,8 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
AbstractConv3d, AbstractConv3d,
AbstractConv3d_gradWeights, AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs, AbstractConv3d_gradInputs,
get_conv_output_shape) get_conv_output_shape,
assert_conv_shape)
from theano.tensor.signal.pool import ( from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad) Pool, MaxPoolGrad, AveragePoolGrad)
from . import pygpu from . import pygpu
...@@ -979,11 +980,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -979,11 +980,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW. # that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1] kerns = kerns[:, :, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1 out_shp = (shape_i(kerns, 1, fgraph),
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1 shape_i(img, 1, fgraph),
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)( shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
shape_i(kerns, 1, fgraph), shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
shape_i(img, 1, fgraph), shape2, shape3) out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = gpu_dnn_conv_gradW()(img, kerns, out, desc) conv = gpu_dnn_conv_gradW()(img, kerns, out, desc)
...@@ -997,11 +999,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -997,11 +999,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img = gpu_contiguous(img) # cudnn v2 rc3 need contiguous data img = gpu_contiguous(img) # cudnn v2 rc3 need contiguous data
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv' conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1 out_shp = (shape_i(img, 0, fgraph),
shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph), shape_i(kerns, 1, fgraph),
shape2, shape3) shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1,
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return gpu_dnn_conv_gradI()(kerns, img, out, desc) return gpu_dnn_conv_gradI()(kerns, img, out, desc)
...@@ -1021,6 +1024,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1021,6 +1024,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out_shp = get_conv_output_shape(ishape, kshape, out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp) out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
return gpu_dnn_conv(algo=algo)(img, kerns, out, desc) return gpu_dnn_conv(algo=algo)(img, kerns, out, desc)
...@@ -1094,12 +1098,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1094,12 +1098,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW. # that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1] kerns = kerns[:, :, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1 out_shp = (shape_i(kerns, 1, fgraph),
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1 shape_i(img, 1, fgraph),
shape4 = shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1 shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)( shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1,
shape_i(kerns, 1, fgraph), shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
shape_i(img, 1, fgraph), shape2, shape3, shape4) out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
conv_mode='cross', precision=precision)(out.shape) conv_mode='cross', precision=precision)(out.shape)
conv = gpu_dnn_conv_gradW()(img, kerns, out, desc) conv = gpu_dnn_conv_gradW()(img, kerns, out, desc)
...@@ -1113,12 +1118,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1113,12 +1118,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
img = gpu_contiguous(img) # cudnn v2 rc3 need contiguous data img = gpu_contiguous(img) # cudnn v2 rc3 need contiguous data
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv' conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1 out_shp = (shape_i(img, 0, fgraph),
shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1
shape4 = shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(shape_i(img, 0, fgraph),
shape_i(kerns, 1, fgraph), shape_i(kerns, 1, fgraph),
shape2, shape3, shape4) shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1,
shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1,
shape_i(img, 4, fgraph) + shape_i(kerns, 4, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
conv_mode=conv_mode, precision=precision)(kerns.shape) conv_mode=conv_mode, precision=precision)(kerns.shape)
return gpu_dnn_conv_gradI()(kerns, img, out, desc) return gpu_dnn_conv_gradI()(kerns, img, out, desc)
...@@ -1138,6 +1144,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1138,6 +1144,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
out_shp = get_conv_output_shape(ishape, kshape, out_shp = get_conv_output_shape(ishape, kshape,
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp) out = gpu_alloc_empty(ctx_name, dtype=img.dtype)(*out_shp)
return gpu_dnn_conv(algo=algo)(img, kerns, out, desc) return gpu_dnn_conv(algo=algo)(img, kerns, out, desc)
......
...@@ -39,11 +39,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -39,11 +39,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return 1; return 1;
} }
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
switch (input->ga.typecode) { switch (input->ga.typecode) {
case GA_DOUBLE: case GA_DOUBLE:
alpha_p = (void *)&alpha; alpha_p = (void *)&alpha;
...@@ -71,6 +66,20 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -71,6 +66,20 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return 1; return 1;
#endif #endif
if (PyGpuArray_DIMS(input)[0] == 0 || PyGpuArray_DIMS(kerns)[0] == 0 || PyGpuArray_DIMS(kerns)[1] == 0) {
int err2 = GpuArray_memset(&(*output)->ga, 0);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv could not fill the output with zeros: %d", err2);
return 1;
}
return 0;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1)
return 1; return 1;
......
...@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return 1; return 1;
} }
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
switch (im->ga.typecode) { switch (im->ga.typecode) {
case GA_DOUBLE: case GA_DOUBLE:
alpha_p = (void *)&alpha; alpha_p = (void *)&alpha;
...@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return 1; return 1;
#endif #endif
if (PyGpuArray_DIMS(im)[0] == 0 || PyGpuArray_DIMS(kerns)[0] == 0 || PyGpuArray_DIMS(kerns)[1] == 0) {
int err2 = GpuArray_memset(&(*input)->ga, 0);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv grad wrt. inputs could not fill the output with zeros: %d", err2);
return 1;
}
return 0;
}
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
if (c_set_tensorNd(*input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensorNd(*input, APPLY_SPECIFIC(input)) == -1)
return 1; return 1;
...@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_enter(c->ctx); cuda_enter(c->ctx);
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
PyGpuArray_NDIM(im), expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
cuda_exit(c->ctx);
return 1;
}
if (PyGpuArray_NDIM(im) == 4) {
if ((PyGpuArray_DIMS(output)[0] != expected_output_dims[0]) ||
(PyGpuArray_DIMS(output)[1] != expected_output_dims[1]) ||
(PyGpuArray_DIMS(output)[2] != expected_output_dims[2]) ||
(PyGpuArray_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld",
expected_output_dims[0], expected_output_dims[1],
expected_output_dims[2], expected_output_dims[3],
PyGpuArray_DIMS(output)[0], PyGpuArray_DIMS(output)[1],
PyGpuArray_DIMS(output)[2], PyGpuArray_DIMS(output)[3]);
cuda_exit(c->ctx);
return 1;
}
} else if (PyGpuArray_NDIM(im) == 5) {
if ((PyGpuArray_DIMS(output)[0] != expected_output_dims[0]) ||
(PyGpuArray_DIMS(output)[1] != expected_output_dims[1]) ||
(PyGpuArray_DIMS(output)[2] != expected_output_dims[2]) ||
(PyGpuArray_DIMS(output)[3] != expected_output_dims[3]) ||
(PyGpuArray_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
expected_output_dims[0], expected_output_dims[1],
expected_output_dims[2], expected_output_dims[3],
expected_output_dims[4],
PyGpuArray_DIMS(output)[0], PyGpuArray_DIMS(output)[1],
PyGpuArray_DIMS(output)[2], PyGpuArray_DIMS(output)[3],
PyGpuArray_DIMS(output)[4]);
cuda_exit(c->ctx);
return 1;
}
}
#ifdef CHOOSE_ALGO #ifdef CHOOSE_ALGO
#ifndef CHOOSE_ONCE #ifndef CHOOSE_ONCE
reuse_algo = 1; reuse_algo = 1;
......
...@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -38,11 +38,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return 1; return 1;
} }
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
switch (input->ga.typecode) { switch (input->ga.typecode) {
case GA_DOUBLE: case GA_DOUBLE:
alpha_p = (void *)&alpha; alpha_p = (void *)&alpha;
...@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -70,6 +65,20 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return 1; return 1;
#endif #endif
if (PyGpuArray_DIMS(input)[0] == 0 || PyGpuArray_DIMS(km)[0] == 0 || PyGpuArray_DIMS(km)[1] == 0) {
int err2 = GpuArray_memset(&(*kerns)->ga, 0);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv grad wrt. weights could not fill the output with zeros: %d", err2);
return 1;
}
return 0;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
...@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -77,6 +86,48 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_enter(c->ctx); cuda_enter(c->ctx);
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
PyGpuArray_NDIM(input), expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
cuda_exit(c->ctx);
return 1;
}
if (PyGpuArray_NDIM(input) == 4) {
if ((PyGpuArray_DIMS(output)[0] != expected_output_dims[0]) ||
(PyGpuArray_DIMS(output)[1] != expected_output_dims[1]) ||
(PyGpuArray_DIMS(output)[2] != expected_output_dims[2]) ||
(PyGpuArray_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%dx%ld"
" but received gradient with shape %ldx%ldx%dx%ld",
expected_output_dims[0], expected_output_dims[1],
expected_output_dims[2], expected_output_dims[3],
PyGpuArray_DIMS(output)[0], PyGpuArray_DIMS(output)[1],
PyGpuArray_DIMS(output)[2], PyGpuArray_DIMS(output)[3]);
cuda_exit(c->ctx);
return 1;
}
} else if (PyGpuArray_NDIM(input) == 5) {
if ((PyGpuArray_DIMS(output)[0] != expected_output_dims[0]) ||
(PyGpuArray_DIMS(output)[1] != expected_output_dims[1]) ||
(PyGpuArray_DIMS(output)[2] != expected_output_dims[2]) ||
(PyGpuArray_DIMS(output)[3] != expected_output_dims[3]) ||
(PyGpuArray_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
expected_output_dims[0], expected_output_dims[1],
expected_output_dims[2], expected_output_dims[3],
expected_output_dims[4],
PyGpuArray_DIMS(output)[0], PyGpuArray_DIMS(output)[1],
PyGpuArray_DIMS(output)[2], PyGpuArray_DIMS(output)[3],
PyGpuArray_DIMS(output)[4]);
cuda_exit(c->ctx);
return 1;
}
}
#ifdef CHOOSE_ALGO #ifdef CHOOSE_ALGO
#ifndef CHOOSE_ONCE #ifndef CHOOSE_ONCE
reuse_algo = 1; reuse_algo = 1;
......
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
import numpy import numpy
...@@ -49,6 +50,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d): ...@@ -49,6 +50,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI) filter_flip=flip, target_op=GpuDnnConvGradI)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
if not dnn_available(test_ctx_name):
raise SkipTest(dnn_available.msg)
if fd != (1, 1):
raise SkipTest("Doesn't have CUDNN implementation")
mode = mode_with_gpu
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
else:
assert_raises((RuntimeError, ValueError),
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
ref=None,
filter_dilation=fd)
class TestDnnConv3d(test_abstract_conv.BaseTestConv3d): class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod @classmethod
...@@ -82,6 +108,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d): ...@@ -82,6 +108,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
provide_shape=provide_shape, border_mode=b, provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI) filter_flip=flip, target_op=GpuDnnConvGradI)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
if not dnn_available(test_ctx_name):
raise SkipTest(dnn_available.msg)
if fd != (1, 1, 1):
raise SkipTest("Doesn't have CUDNN implementation")
mode = mode_with_gpu
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
else:
assert_raises((RuntimeError, ValueError),
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
ref=None,
filter_dilation=fd)
class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d): class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
@classmethod @classmethod
...@@ -115,6 +166,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d): ...@@ -115,6 +166,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
target_op=GpuCorrMM_gradInputs, target_op=GpuCorrMM_gradInputs,
filter_dilation=fd) filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
mode = self.mode
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs,
filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs,
ref=None,
filter_dilation=fd)
class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d): class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod @classmethod
...@@ -148,6 +221,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d): ...@@ -148,6 +221,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
target_op=GpuCorr3dMM_gradInputs, target_op=GpuCorr3dMM_gradInputs,
filter_dilation=fd) filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
mode = self.mode
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradInputs,
filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradInputs,
ref=None,
filter_dilation=fd)
class TestDnnConvTypes(test_abstract_conv.TestConvTypes): class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
def setUp(self): def setUp(self):
......
...@@ -12,6 +12,7 @@ import theano.tensor as T ...@@ -12,6 +12,7 @@ import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.tensor.signal.pool import pool_2d, pool_3d from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape
from .. import dnn from .. import dnn
from ..basic_ops import GpuAllocEmpty from ..basic_ops import GpuAllocEmpty
...@@ -628,56 +629,50 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -628,56 +629,50 @@ class TestDnnInferShapes(utt.InferShapeTester):
[(1, 1, 1), (2, 2, 2)], [(1, 1, 1), (2, 2, 2)],
'none') 'none')
def _test_conv_gradw(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsample): def _test_conv_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample):
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
border_mode, subsample)
img_val = numpy.asarray( img_val = numpy.asarray(
img_val, numpy.random.rand(*img_shape),
dtype=theano.config.floatX dtype=theano.config.floatX
) )
kern_vals = numpy.asarray( topgrad_vals = numpy.asarray(
kern_vals, numpy.random.rand(*topgrad_shape),
dtype=theano.config.floatX dtype=theano.config.floatX
) )
temp_img = img.dimshuffle(1, 0, 2, 3) kerns_vals = numpy.zeros(kerns_shape, dtype=theano.config.floatX)
temp_kerns = kerns kerns_shape = theano.shared(numpy.asarray(kerns_shape))
if conv_mode == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype=theano.config.floatX)
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=border_mode, border_mode=border_mode,
subsample=subsample, subsample=subsample,
conv_mode=conv_mode, conv_mode=conv_mode,
precision=set_precision(theano.config.floatX) precision=set_precision(theano.config.floatX)
)(out.shape) )(kerns_shape)
conv_grad_w = dnn.GpuDnnConvGradW()( conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img, img,
temp_kerns, topgrad,
out, kerns,
desc, desc,
) )
self._compile_and_check( self._compile_and_check(
[temp_img, temp_kerns, out], [img, topgrad, kerns],
[conv_grad_w], [conv_grad_w],
[img_val, kern_vals, out_vals], [img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConvGradW dnn.GpuDnnConvGradW
) )
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func) @parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv_gradw(self, border_mode, conv_mode): def test_conv_gradw(self, border_mode, conv_mode):
self._test_conv_gradw(T.tensor4('img'), self._test_conv_gradw(T.tensor4('img'),
T.tensor4('topgrad'),
T.tensor4('kerns'), T.tensor4('kerns'),
T.tensor4('out'), (5, 2, 6, 13),
numpy.random.rand(2, 5, 6, 8), (1, 2, 3, 7),
numpy.random.rand(2, 1, 5, 6),
border_mode, border_mode,
conv_mode, conv_mode,
(1, 1)) (1, 1))
......
...@@ -922,7 +922,7 @@ class BaseGpuCorrMM(GpuOp): ...@@ -922,7 +922,7 @@ class BaseGpuCorrMM(GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (0, 26) return (0, 30)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -960,19 +960,19 @@ class BaseGpuCorrMM(GpuOp): ...@@ -960,19 +960,19 @@ class BaseGpuCorrMM(GpuOp):
sub sub
Dictionary of substitutions useable to help generating the C code. Dictionary of substitutions useable to help generating the C code.
height height
If self.subsample[0] != 1, a variable giving the height of the Required if self.subsample[0] != 1, a variable giving the height of
filters for direction="backprop weights" or the height of the input the filters for direction="backprop weights" or the height of the
images for direction="backprop inputs". input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the Required if self.border_mode == 'half', a variable giving the height
filters for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
width width
If self.subsample[1] != 1, a variable giving the width of the Required if self.subsample[1] != 1, a variable giving the width of
filters for direction="backprop weights" or the width of the the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs". input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the Required if self.border_mode == 'half', a variable giving the width
filters for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
""" """
dH, dW = self.subsample dH, dW = self.subsample
...@@ -1001,18 +1001,18 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1001,18 +1001,18 @@ class BaseGpuCorrMM(GpuOp):
# When subsampling, we cannot unambiguously infer the height and width # When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given. # of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size. # Similarly, when pad="half", we cannot infer the weight size.
if height:
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height
else:
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)): if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
if not height:
raise ValueError("height must be given for backprop with vertical sampling or pad='half'") raise ValueError("height must be given for backprop with vertical sampling or pad='half'")
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height height = '-1'
if width:
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width
else: else:
height = 'NULL'
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)): if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
if not width:
raise ValueError("width must be given for backprop with horizontal sampling or pad='half'") raise ValueError("width must be given for backprop with horizontal sampling or pad='half'")
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width width = '-1'
else:
width = 'NULL'
sub = sub.copy() sub = sub.copy()
sub.update(locals()) sub.update(locals())
...@@ -1035,15 +1035,15 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1035,15 +1035,15 @@ class BaseGpuCorrMM(GpuOp):
// Obtain or infer kernel width and height // Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding) // (we need to know it early to be able to handle auto-padding)
int kH, kW; int kH, kW, dil_kH, dil_kW;
if (direction != 1) { if (direction != 1) {
// weight is an input variable, we can just read its shape // weight is an input variable, we can just read its shape
kH = CudaNdarray_HOST_DIMS(weights)[2]; kH = CudaNdarray_HOST_DIMS(weights)[2];
kW = CudaNdarray_HOST_DIMS(weights)[3]; kW = CudaNdarray_HOST_DIMS(weights)[3];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -1054,7 +1054,7 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1054,7 +1054,7 @@ class BaseGpuCorrMM(GpuOp):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ; kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -1066,8 +1066,8 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1066,8 +1066,8 @@ class BaseGpuCorrMM(GpuOp):
} }
// Implicit dilated kernel size // Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1; dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1; dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested // Auto-padding if requested
if (padH == -1) { // vertical half padding if (padH == -1) { // vertical half padding
...@@ -1101,6 +1101,20 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1101,6 +1101,20 @@ class BaseGpuCorrMM(GpuOp):
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[0]; out_dim[1] = CudaNdarray_HOST_DIMS(weights)[0];
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - ((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1; out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - ((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - ((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1; out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - ((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
CudaNdarray_HOST_DIMS(bottom)[0], CudaNdarray_HOST_DIMS(bottom)[1],
CudaNdarray_HOST_DIMS(bottom)[2], CudaNdarray_HOST_DIMS(bottom)[3],
CudaNdarray_HOST_DIMS(weights)[0], CudaNdarray_HOST_DIMS(weights)[1],
CudaNdarray_HOST_DIMS(weights)[2], CudaNdarray_HOST_DIMS(weights)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)s
}
break; break;
case 1: // backprop wrt. weights case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width) // output is weights: (num_filters, num_channels, height, width)
...@@ -1109,14 +1123,42 @@ class BaseGpuCorrMM(GpuOp): ...@@ -1109,14 +1123,42 @@ class BaseGpuCorrMM(GpuOp):
out_dim[1] = CudaNdarray_HOST_DIMS(bottom)[1]; out_dim[1] = CudaNdarray_HOST_DIMS(bottom)[1];
out_dim[2] = kH; // already inferred further above out_dim[2] = kH; // already inferred further above
out_dim[3] = kW; // how convenient out_dim[3] = kW; // how convenient
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
CudaNdarray_HOST_DIMS(bottom)[0], CudaNdarray_HOST_DIMS(bottom)[1],
CudaNdarray_HOST_DIMS(bottom)[2], CudaNdarray_HOST_DIMS(bottom)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
CudaNdarray_HOST_DIMS(top)[0], CudaNdarray_HOST_DIMS(top)[1],
CudaNdarray_HOST_DIMS(top)[2], CudaNdarray_HOST_DIMS(top)[3]);
%(fail)s
}
break; break;
case 2: // backprop wrt. inputs case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width) // output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0]; out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1]; out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH; out_dim[2] = (%(height)s != -1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW; out_dim[3] = (%(width)s != -1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weight shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
CudaNdarray_HOST_DIMS(weights)[0], CudaNdarray_HOST_DIMS(weights)[1],
CudaNdarray_HOST_DIMS(weights)[2], CudaNdarray_HOST_DIMS(weights)[3],
CudaNdarray_HOST_DIMS(top)[0], CudaNdarray_HOST_DIMS(top)[1],
CudaNdarray_HOST_DIMS(top)[2], CudaNdarray_HOST_DIMS(top)[3]);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2\\n");
...@@ -1274,15 +1316,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM): ...@@ -1274,15 +1316,15 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
raise TypeError('img must be 4D tensor') raise TypeError('img must be 4D tensor')
if topgrad.type.ndim != 4: if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor') raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) or self.border_mode == "half":
if shape is None: if shape is None:
if self.subsample != (1, 1) or self.border_mode == "half":
raise ValueError('shape must be given if subsample != (1, 1)' raise ValueError('shape must be given if subsample != (1, 1)'
' or border_mode == "half"') ' or border_mode == "half"')
height_width = []
else:
height_width = [shape[0], shape[1]] height_width = [shape[0], shape[1]]
assert shape[0].ndim == 0 assert shape[0].ndim == 0
assert shape[1].ndim == 0 assert shape[1].ndim == 0
else:
height_width = []
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1], broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False] False, False]
...@@ -1343,10 +1385,12 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM): ...@@ -1343,10 +1385,12 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
raise TypeError('kern must be 4D tensor') raise TypeError('kern must be 4D tensor')
if topgrad.type.ndim != 4: if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor') raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) and shape is None: if shape is None:
if self.subsample != (1, 1):
raise ValueError('shape must be given if subsample != (1, 1)') raise ValueError('shape must be given if subsample != (1, 1)')
height_width = [shape[0], shape[1]] if self.subsample != (1, 1) else [] height_width = []
if height_width: else:
height_width = [shape[0], shape[1]]
assert shape[0].ndim == 0 assert shape[0].ndim == 0
assert shape[1].ndim == 0 assert shape[1].ndim == 0
...@@ -1469,7 +1513,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1469,7 +1513,7 @@ class BaseGpuCorr3dMM(GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (0, 25) return (0, 29)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -1510,26 +1554,26 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1510,26 +1554,26 @@ class BaseGpuCorr3dMM(GpuOp):
sub sub
Dictionary of substitutions useable to help generating the C code. Dictionary of substitutions useable to help generating the C code.
height height
If self.subsample[0] != 1, a variable giving the height Required if self.subsample[0] != 1, a variable giving the height of
of the filters for direction="backprop weights" or the height of the the filters for direction="backprop weights" or the height of the
input images for direction="backprop inputs". input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the height of the filters Required if self.border_mode == 'half', a variable giving the height
for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
width width
If self.subsample[1] != 1, a variable giving the width Required if self.subsample[1] != 1, a variable giving the width of
of the filters for direction="backprop weights" or the width of the the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs". input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the width of the filters Required if self.border_mode == 'half', a variable giving the width
for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
depth depth
If self.subsample[2] != 1, a variable giving the depth Required if self.subsample[2] != 1, a variable giving the depth of
of the filters for direction="backprop weights" or the depth of the the filters for direction="backprop weights" or the depth of the
input images for direction="backprop inputs". input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the depth of the filters Required if self.border_mode == 'half', a variable giving the depth
for direction="backprop weights". of the filters for direction="backprop weights".
Ignored otherwise. Not required otherwise, but if a value is given this will be checked.
""" """
dH, dW, dD = self.subsample dH, dW, dD = self.subsample
...@@ -1558,24 +1602,24 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1558,24 +1602,24 @@ class BaseGpuCorr3dMM(GpuOp):
# When subsampling, we cannot unambiguously infer the height and width # When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given. # of bottom and weights from top, so we require them to be given.
# Similarly, when pad="half", we cannot infer the weight size. # Similarly, when pad="half", we cannot infer the weight size.
if height:
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height
else:
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)): if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
if not height:
raise ValueError("height must be given for backprop with vertical sampling or pad='half'") raise ValueError("height must be given for backprop with vertical sampling or pad='half'")
height = '(*(npy_int*)(PyArray_DATA(%s)))' % height height = '-1'
if width:
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width
else: else:
height = 'NULL'
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)): if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
if not width:
raise ValueError("width must be given for backprop with horizontal sampling or pad='half'") raise ValueError("width must be given for backprop with horizontal sampling or pad='half'")
width = '(*(npy_int*)(PyArray_DATA(%s)))' % width width = '-1'
if depth:
depth = '(*(npy_int*)(PyArray_DATA(%s)))' % depth
else: else:
width = 'NULL'
if ((direction != 0) and (dD != 1)) or ((direction == 1) and (padD == -1)): if ((direction != 0) and (dD != 1)) or ((direction == 1) and (padD == -1)):
if not depth:
raise ValueError("depth must be given for backprop with horizontal sampling or pad='half'") raise ValueError("depth must be given for backprop with horizontal sampling or pad='half'")
depth = '(*(npy_int*)(PyArray_DATA(%s)))' % depth depth = '-1'
else:
depth = 'NULL'
sub = sub.copy() sub = sub.copy()
sub.update(locals()) sub.update(locals())
...@@ -1601,7 +1645,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1601,7 +1645,7 @@ class BaseGpuCorr3dMM(GpuOp):
// Obtain or infer kernel width and height // Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding) // (we need to know it early to be able to handle auto-padding)
int kH, kW, kD; int kH, kW, kD, dil_kH, dil_kW, dil_kD;
if (direction != 1) if (direction != 1)
{ {
// weight is an input variable, we can just read its shape // weight is an input variable, we can just read its shape
...@@ -1611,9 +1655,9 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1611,9 +1655,9 @@ class BaseGpuCorr3dMM(GpuOp):
} }
else else
{ {
if ((dH != 1) || (padH == -1)) if (%(height)s != -1)
{ {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) else if (padH == -2)
...@@ -1626,7 +1670,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1626,7 +1670,7 @@ class BaseGpuCorr3dMM(GpuOp):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ; kH = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1 ;
} }
if ((dW != 1) || (padW == -1)) if (%(width)s != -1)
{ {
kW = %(width)s; kW = %(width)s;
} }
...@@ -1638,7 +1682,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1638,7 +1682,7 @@ class BaseGpuCorr3dMM(GpuOp):
{ {
kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
if ((dD != 1) || (padD == -1)) if (%(depth)s != -1)
{ {
kD = %(depth)s; kD = %(depth)s;
} }
...@@ -1653,9 +1697,9 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1653,9 +1697,9 @@ class BaseGpuCorr3dMM(GpuOp):
} }
// Implicit dilated kernel size // Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1; dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1; dil_kW = (kW - 1) * dilW + 1;
int dil_kD = (kD - 1) * dilD + 1; dil_kD = (kD - 1) * dilD + 1;
// Auto-padding if requested // Auto-padding if requested
if (padH == -1) if (padH == -1)
...@@ -1707,6 +1751,22 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1707,6 +1751,22 @@ class BaseGpuCorr3dMM(GpuOp):
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - ((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1; out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - ((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - ((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1; out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - ((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1;
out_dim[4] = (CudaNdarray_HOST_DIMS(bottom)[4] + 2*padD - ((CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1; out_dim[4] = (CudaNdarray_HOST_DIMS(bottom)[4] + 2*padD - ((CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
CudaNdarray_HOST_DIMS(bottom)[0], CudaNdarray_HOST_DIMS(bottom)[1],
CudaNdarray_HOST_DIMS(bottom)[2], CudaNdarray_HOST_DIMS(bottom)[3],
CudaNdarray_HOST_DIMS(bottom)[4],
CudaNdarray_HOST_DIMS(weights)[0], CudaNdarray_HOST_DIMS(weights)[1],
CudaNdarray_HOST_DIMS(weights)[2], CudaNdarray_HOST_DIMS(weights)[3],
CudaNdarray_HOST_DIMS(weights)[4],
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4]);
%(fail)s
}
break; break;
case 1: // backprop wrt. weights case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width, depth) // output is weights: (num_filters, num_channels, height, width, depth)
...@@ -1716,23 +1776,53 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1716,23 +1776,53 @@ class BaseGpuCorr3dMM(GpuOp):
out_dim[2] = kH; // already inferred further above out_dim[2] = kH; // already inferred further above
out_dim[3] = kW; // how convenient out_dim[3] = kW; // how convenient
out_dim[4] = kD; out_dim[4] = kD;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
CudaNdarray_HOST_DIMS(bottom)[0], CudaNdarray_HOST_DIMS(bottom)[1],
CudaNdarray_HOST_DIMS(bottom)[2], CudaNdarray_HOST_DIMS(bottom)[3],
CudaNdarray_HOST_DIMS(bottom)[4],
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4],
CudaNdarray_HOST_DIMS(top)[0], CudaNdarray_HOST_DIMS(top)[1],
CudaNdarray_HOST_DIMS(top)[2], CudaNdarray_HOST_DIMS(top)[3],
CudaNdarray_HOST_DIMS(top)[4]);
%(fail)s
}
break; break;
case 2: // backprop wrt. inputs case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width, depth) // output is bottom: (batchsize, num_channels, height, width, depth)
// height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height, width and depth: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0]; out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1]; out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH; out_dim[2] = (%(height)s != -1) ? %(height)s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH + (CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (dW != 1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW; out_dim[3] = (%(width)s != -1) ? %(width)s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW + (CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1 - 2*padW;
out_dim[4] = (dD != 1) ? %(depth)s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD; out_dim[4] = (%(depth)s != -1) ? %(depth)s : (CudaNdarray_HOST_DIMS(top)[4] - 1) * dD + (CudaNdarray_HOST_DIMS(weights)[4]-1)*dilD + 1 - 2*padD;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorr3dMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4],
CudaNdarray_HOST_DIMS(weights)[0], CudaNdarray_HOST_DIMS(weights)[1],
CudaNdarray_HOST_DIMS(weights)[2], CudaNdarray_HOST_DIMS(weights)[3],
CudaNdarray_HOST_DIMS(weights)[4],
CudaNdarray_HOST_DIMS(top)[0], CudaNdarray_HOST_DIMS(top)[1],
CudaNdarray_HOST_DIMS(top)[2], CudaNdarray_HOST_DIMS(top)[3],
CudaNdarray_HOST_DIMS(top)[4]);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: direction must be 0, 1, or 2\\n");
%(fail)s %(fail)s
} }
// Prepare output array // Prepare output array
if (!(%(out)s if (!(%(out)s
&& %(out)s->nd == 5 && %(out)s->nd == 5
...@@ -1876,16 +1966,16 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM): ...@@ -1876,16 +1966,16 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
raise TypeError('img must be 5D tensor') raise TypeError('img must be 5D tensor')
if topgrad.type.ndim != 5: if topgrad.type.ndim != 5:
raise TypeError('topgrad must be 5D tensor') raise TypeError('topgrad must be 5D tensor')
if self.subsample != (1, 1, 1) or self.border_mode == "half":
if shape is None: if shape is None:
if self.subsample != (1, 1, 1) or self.border_mode == "half":
raise ValueError('shape must be given if subsample != (1, 1, 1)' raise ValueError('shape must be given if subsample != (1, 1, 1)'
' or border_mode == "half"') ' or border_mode == "half"')
height_width_depth = []
else:
height_width_depth = [shape[0], shape[1], shape[2]] height_width_depth = [shape[0], shape[1], shape[2]]
assert shape[0].ndim == 0 assert shape[0].ndim == 0
assert shape[1].ndim == 0 assert shape[1].ndim == 0
assert shape[2].ndim == 0 assert shape[2].ndim == 0
else:
height_width_depth = []
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1], broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False, False] False, False, False]
...@@ -1943,10 +2033,12 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM): ...@@ -1943,10 +2033,12 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
raise TypeError('kern must be 5D tensor') raise TypeError('kern must be 5D tensor')
if topgrad.type.ndim != 5: if topgrad.type.ndim != 5:
raise TypeError('topgrad must be 5D tensor') raise TypeError('topgrad must be 5D tensor')
if self.subsample != (1, 1, 1) and shape is None: if shape is None:
if self.subsample != (1, 1, 1):
raise ValueError('shape must be given if subsample != (1, 1, 1)') raise ValueError('shape must be given if subsample != (1, 1, 1)')
height_width_depth = [shape[0], shape[1], shape[2]] if self.subsample != (1, 1, 1) else [] height_width_depth = []
if height_width_depth: else:
height_width_depth = [shape[0], shape[1], shape[2]]
assert shape[0].ndim == 0 assert shape[0].ndim == 0
assert shape[1].ndim == 0 assert shape[1].ndim == 0
assert shape[2].ndim == 0 assert shape[2].ndim == 0
......
...@@ -429,9 +429,17 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom, ...@@ -429,9 +429,17 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
const int dil_kW = (kW - 1) * dilW + 1; const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1; const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth) // top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = int((bottomHeight + 2*padH - dil_kH) / dH) + 1; const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = int((bottomWidth + 2*padW - dil_kW) / dW) + 1; const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepth = int((bottomDepth + 2*padD - dil_kD) / dD) + 1; const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] || if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] || nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] || topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
...@@ -478,6 +486,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom, ...@@ -478,6 +486,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
if (direction == 0) if (direction == 0)
{ // forward pass { // forward pass
output = top; output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im2col, then gemm // valid correlation: im2col, then gemm
// Iterate over batch // Iterate over batch
for (int n = 0; n < batchSize; n++) for (int n = 0; n < batchSize; n++)
...@@ -527,6 +548,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom, ...@@ -527,6 +548,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
{ {
// backprop wrt. weights // backprop wrt. weights
output = weight; output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. weights could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im2col, then gemm // valid convolution: im2col, then gemm
// Iterate over batch // Iterate over batch
for (int n = 0; n < batchSize; n++) for (int n = 0; n < batchSize; n++)
...@@ -578,6 +612,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom, ...@@ -578,6 +612,19 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
{ {
// backprop wrt. inputs // backprop wrt. inputs
output = bottom; output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorr3dMM grad wrt. inputs could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im3d // full convolution: gemm, then col2im3d
// Iterate over batch // Iterate over batch
for (int n = 0; n < batchSize; n++) for (int n = 0; n < batchSize; n++)
......
...@@ -333,8 +333,15 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -333,8 +333,15 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
const int dil_kH = (kH - 1) * dilH + 1; const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1; const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth) // top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) % y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] || if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] || nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] || topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
...@@ -377,6 +384,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -377,6 +384,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
CudaNdarray *output; CudaNdarray *output;
if (direction == 0) { // forward pass if (direction == 0) { // forward pass
output = top; output = top;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid correlation: im2col, then gemm // valid correlation: im2col, then gemm
// Iterate over batch // Iterate over batch
for (int n = 0; n < batchSize; n++) { for (int n = 0; n < batchSize; n++) {
...@@ -445,6 +465,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -445,6 +465,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
} }
else if (direction == 1) { // backprop wrt. weights else if (direction == 1) { // backprop wrt. weights
output = weight; output = weight;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. weights could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// valid convolution: im2col, then gemm // valid convolution: im2col, then gemm
// Iterate over batch // Iterate over batch
for (int n = 0; n < batchSize; n++) { for (int n = 0; n < batchSize; n++) {
...@@ -513,6 +546,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom, ...@@ -513,6 +546,19 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
} }
else if (direction == 2) { // backprop wrt. inputs else if (direction == 2) { // backprop wrt. inputs
output = bottom; output = bottom;
if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
cudaError_t err = cudaMemset(output->devdata, 0,
CudaNdarray_SIZE(output) * sizeof(real));
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuCorrMM grad wrt. inputs could not fill the output with zeros: %s",
cudaGetErrorString(err));
Py_DECREF(col);
return NULL;
}
Py_DECREF(col);
return output;
}
// full convolution: gemm, then col2im // full convolution: gemm, then col2im
// Iterate over batch // Iterate over batch
for (int n = 0; n < batchSize; n++) { for (int n = 0; n < batchSize; n++) {
......
...@@ -14,7 +14,8 @@ from theano.gof.type import CDataType ...@@ -14,7 +14,8 @@ from theano.gof.type import CDataType
from theano.compile import optdb from theano.compile import optdb
from theano.compile.ops import shape_i from theano.compile.ops import shape_i
from theano.tensor.nnet import LogSoftmax, SoftmaxGrad from theano.tensor.nnet import LogSoftmax, SoftmaxGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape from theano.tensor.nnet.abstract_conv import (get_conv_output_shape,
assert_conv_shape)
from theano.tensor.signal.pool import ( from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad) Pool, MaxPoolGrad, AveragePoolGrad)
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
...@@ -1132,10 +1133,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1132,10 +1133,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW. # that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1] kerns = kerns[:, :, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1 out_shp = (shape_i(kerns, 1, fgraph),
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1 shape_i(img, 1, fgraph),
out = gpu_alloc_empty(shape_i(kerns, 1, fgraph), shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
shape_i(img, 1, fgraph), shape2, shape3) shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='cross', precision=precision)(img.shape, conv_mode='cross', precision=precision)(img.shape,
out.shape) out.shape)
...@@ -1149,10 +1152,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1149,10 +1152,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
img = gpu_contiguous(img) img = gpu_contiguous(img)
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3))
conv_mode = 'cross' if conv_mode == 'conv' else 'conv' conv_mode = 'cross' if conv_mode == 'conv' else 'conv'
shape2 = shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1 out_shp = (shape_i(img, 0, fgraph),
shape3 = shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1 shape_i(kerns, 1, fgraph),
out = gpu_alloc_empty(shape_i(img, 0, fgraph), shape_i(img, 2, fgraph) + shape_i(kerns, 2, fgraph) - 1,
shape_i(kerns, 1, fgraph), shape2, shape3) shape_i(img, 3, fgraph) + shape_i(kerns, 3, fgraph) - 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode=conv_mode, precision=precision)(out.shape, conv_mode=conv_mode, precision=precision)(out.shape,
kerns.shape) kerns.shape)
...@@ -1170,6 +1175,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -1170,6 +1175,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out_shp = GpuDnnConv.get_out_shape(img.shape, kerns.shape, out_shp = GpuDnnConv.get_out_shape(img.shape, kerns.shape,
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp) out = gpu_alloc_empty(*out_shp)
return GpuDnnConv(algo=algo)(img, kerns, out, desc) return GpuDnnConv(algo=algo)(img, kerns, out, desc)
...@@ -1248,11 +1254,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1248,11 +1254,13 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
# that would be flipped by conv_mode='conv' in GpuDnnConvGradW. # that would be flipped by conv_mode='conv' in GpuDnnConvGradW.
kerns = kerns[:, :, ::-1, ::-1, ::-1] kerns = kerns[:, :, ::-1, ::-1, ::-1]
kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4)) kerns = gpu_contiguous(kerns.dimshuffle(1, 0, 2, 3, 4))
shape2 = shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1 out_shp = (shape_i(kerns, 1, fgraph),
shape3 = shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1 shape_i(img, 1, fgraph),
shape4 = shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1 shape_i(img, 2, fgraph) - shape_i(kerns, 2, fgraph) + 1,
out = gpu_alloc_empty(shape_i(kerns, 1, fgraph), shape_i(img, 3, fgraph) - shape_i(kerns, 3, fgraph) + 1,
shape_i(img, 1, fgraph), shape2, shape3, shape4) shape_i(img, 4, fgraph) - shape_i(kerns, 4, fgraph) + 1)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp)
desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1), desc = GpuDnnConvDesc(border_mode='valid', subsample=(1, 1, 1),
conv_mode='cross', precision=precision)(img.shape, conv_mode='cross', precision=precision)(img.shape,
out.shape) out.shape)
...@@ -1271,6 +1279,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), ...@@ -1271,6 +1279,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
out_shp = GpuDnnConv3d.get_out_shape(img.shape, kerns.shape, out_shp = GpuDnnConv3d.get_out_shape(img.shape, kerns.shape,
desc_op.border_mode, desc_op.border_mode,
desc_op.subsample) desc_op.subsample)
out_shp = assert_conv_shape(out_shp)
out = gpu_alloc_empty(*out_shp) out = gpu_alloc_empty(*out_shp)
return GpuDnnConv3d(algo=algo)(img, kerns, out, desc) return GpuDnnConv3d(algo=algo)(img, kerns, out, desc)
......
...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return 1; return 1;
} }
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
int nb_dim = CudaNdarray_NDIM(input); int nb_dim = CudaNdarray_NDIM(input);
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
...@@ -30,6 +25,22 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -30,6 +25,22 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return 1; return 1;
#endif #endif
if (CudaNdarray_DIMS(input)[0] == 0 || CudaNdarray_DIMS(kerns)[0] == 0 || CudaNdarray_DIMS(kerns)[1] == 0) {
cudaError_t err2 = cudaMemset((*output)->devdata, 0,
CudaNdarray_SIZE(*output) * sizeof(real));
if (err2 != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv could not fill the output with zeros: %s",
cudaGetErrorString(err2));
return 1;
}
return 0;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1)
return 1; return 1;
......
...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return 1; return 1;
} }
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
int nb_dim = CudaNdarray_NDIM(output); int nb_dim = CudaNdarray_NDIM(output);
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
...@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return 1; return 1;
#endif #endif
if (CudaNdarray_DIMS(im)[0] == 0 || CudaNdarray_DIMS(kerns)[0] == 0 || CudaNdarray_DIMS(kerns)[1] == 0) {
cudaError_t err2 = cudaMemset((*input)->devdata, 0,
CudaNdarray_SIZE(*input) * sizeof(real));
if (err2 != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv grad wrt. inputs could not fill the output with zeros: %s",
cudaGetErrorString(err2));
return 1;
}
return 0;
}
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filterNd(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
if (c_set_tensorNd(*input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensorNd(*input, APPLY_SPECIFIC(input)) == -1)
return 1; return 1;
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
nb_dim, expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
return 1;
}
if (nb_dim == 4) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3]);
return 1;
}
} else if (nb_dim == 5) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3]) ||
(CudaNdarray_HOST_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)expected_output_dims[4],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3],
(long int)CudaNdarray_HOST_DIMS(output)[4]);
return 1;
}
}
{ {
size_t worksize; size_t worksize;
void *workspace; void *workspace;
......
...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return 1; return 1;
} }
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
int nb_dim = CudaNdarray_NDIM(output); int nb_dim = CudaNdarray_NDIM(output);
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
...@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -30,9 +25,64 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return 1; return 1;
#endif #endif
if (CudaNdarray_DIMS(input)[0] == 0 || CudaNdarray_DIMS(km)[0] == 0 || CudaNdarray_DIMS(km)[1] == 0) {
cudaError_t err2 = cudaMemset((*kerns)->devdata, 0,
CudaNdarray_SIZE(*kerns) * sizeof(real));
if (err2 != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv grad wrt. weights could not fill the output with zeros: %s",
cudaGetErrorString(err2));
return 1;
}
return 0;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_tensorNd(output, APPLY_SPECIFIC(output)) == -1)
return 1;
if (c_set_filterNd(*kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filterNd(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
nb_dim, expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
return 1;
}
if (nb_dim == 4) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%dx%ld"
" but received gradient with shape %ldx%ldx%dx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3]);
return 1;
}
} else if (nb_dim == 5) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3]) ||
(CudaNdarray_HOST_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)expected_output_dims[4],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3],
(long int)CudaNdarray_HOST_DIMS(output)[4]);
return 1;
}
}
{ {
size_t worksize; size_t worksize;
void *workspace; void *workspace;
......
...@@ -13,6 +13,7 @@ from theano.sandbox.cuda.blas import ( ...@@ -13,6 +13,7 @@ from theano.sandbox.cuda.blas import (
GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs, GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs,
GpuCorr3dMM, GpuCorr3dMM_gradWeights, GpuCorr3dMM_gradInputs) GpuCorr3dMM, GpuCorr3dMM_gradWeights, GpuCorr3dMM_gradInputs)
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
if not cuda.cuda_available: if not cuda.cuda_available:
...@@ -57,6 +58,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d): ...@@ -57,6 +58,31 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
filter_flip=flip, target_op=GpuDnnConvGradI, filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd) filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
if fd != (1, 1):
raise SkipTest("No dilation implementation for cuDNN ConvOp.")
if not dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
mode = mode_with_gpu
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
else:
assert_raises((RuntimeError, ValueError),
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
ref=None,
filter_dilation=fd)
class TestDnnConv3d(test_abstract_conv.BaseTestConv3d): class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod @classmethod
...@@ -91,6 +117,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d): ...@@ -91,6 +117,31 @@ class TestDnnConv3d(test_abstract_conv.BaseTestConv3d):
filter_flip=flip, target_op=GpuDnnConv3dGradI, filter_flip=flip, target_op=GpuDnnConv3dGradI,
filter_dilation=fd) filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
if fd != (1, 1, 1):
raise SkipTest("No dilation implementation for cuDNN ConvOp.")
if not dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
mode = mode_with_gpu
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
filter_dilation=fd)
else:
assert_raises((RuntimeError, ValueError),
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=GpuDnnConvGradI,
ref=None,
filter_dilation=fd)
class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d): class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
@classmethod @classmethod
...@@ -124,6 +175,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d): ...@@ -124,6 +175,28 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
target_op=GpuCorrMM_gradInputs, target_op=GpuCorrMM_gradInputs,
filter_dilation=fd) filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
mode = self.mode
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs,
filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorrMM_gradInputs,
ref=None,
filter_dilation=fd)
class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d): class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
@classmethod @classmethod
...@@ -157,6 +230,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d): ...@@ -157,6 +230,28 @@ class TestCorrMMConv3d(test_abstract_conv.BaseTestConv3d):
target_op=GpuCorr3dMM_gradInputs, target_op=GpuCorr3dMM_gradInputs,
filter_dilation=fd) filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
mode = self.mode
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=True, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradInputs,
filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=GpuCorr3dMM_gradInputs,
ref=None,
filter_dilation=fd)
class TestDnnConvTypes(test_abstract_conv.TestConvTypes): class TestDnnConvTypes(test_abstract_conv.TestConvTypes):
def setUp(self): def setUp(self):
......
...@@ -4,6 +4,7 @@ import os ...@@ -4,6 +4,7 @@ import os
import sys import sys
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
from itertools import chain, product from itertools import chain, product
import six.moves.cPickle as pickle import six.moves.cPickle as pickle
from six import StringIO from six import StringIO
...@@ -16,6 +17,7 @@ import theano.tensor as T ...@@ -16,6 +17,7 @@ import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.tensor.signal.pool import pool_2d, pool_3d from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape
import theano.sandbox.cuda.dnn as dnn import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
from theano.sandbox.cuda import float32_shared_constructor as shared from theano.sandbox.cuda import float32_shared_constructor as shared
...@@ -979,99 +981,105 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -979,99 +981,105 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnConv3d dnn.GpuDnnConv3d
) )
def test_conv_gradw(self): def _test_conv_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample):
if not dnn.dnn_available(): if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns') topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
out = T.ftensor4('out') border_mode, subsample)
img_val = numpy.asarray( img_val = numpy.asarray(
numpy.random.rand(2, 5, 6, 8), numpy.random.rand(*img_shape),
dtype='float32' dtype=theano.config.floatX
) )
kern_vals = numpy.asarray( topgrad_vals = numpy.asarray(
numpy.random.rand(2, 1, 5, 6), numpy.random.rand(*topgrad_shape),
dtype='float32' dtype=theano.config.floatX
) )
for params in product( kerns_vals = numpy.zeros(kerns_shape, dtype=theano.config.floatX)
['valid', 'full', 'half'], kerns_shape = theano.shared(numpy.asarray(kerns_shape))
[(1, 1)], # strides besides (1, 1) topgrad_shape = theano.shared(numpy.asarray(topgrad_shape))
['conv', 'cross']
):
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if params[2] == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=params[0], border_mode=border_mode,
subsample=params[1], subsample=subsample,
conv_mode=params[2] conv_mode=conv_mode
)(temp_img.shape, out.shape) )(topgrad_shape, kerns_shape)
conv_grad_w = dnn.GpuDnnConvGradW()( conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img, img,
temp_kerns, topgrad,
out, kerns,
desc, desc,
) )
self._compile_and_check( self._compile_and_check(
[temp_img, temp_kerns, out], [img, topgrad, kerns],
[conv_grad_w], [conv_grad_w],
[img_val, kern_vals, out_vals], [img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConvGradW dnn.GpuDnnConvGradW
) )
def test_conv3d_gradw(self): border_modes = ['valid', 'full', 'half']
conv_modes = ['conv', 'cross']
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv_gradw(self, border_mode, conv_mode):
self._test_conv_gradw(T.tensor4('img'),
T.tensor4('topgrad'),
T.tensor4('kerns'),
(5, 2, 6, 13),
(1, 2, 3, 7),
border_mode,
conv_mode,
(1, 1))
def _test_conv3d_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample):
if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)): if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
raise SkipTest('"cuDNN 3D convolution requires cuDNN v2') raise SkipTest('"cuDNN 3D convolution requires cuDNN v2')
img = T.ftensor5('img')
kerns = T.ftensor5('kerns') topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
out = T.ftensor5('out') border_mode, subsample)
img_val = numpy.asarray( img_val = numpy.asarray(
numpy.random.rand(9, 2, 4, 8, 13), numpy.random.rand(*img_shape),
dtype='float32' dtype=theano.config.floatX
) )
kern_vals = numpy.asarray( topgrad_vals = numpy.asarray(
numpy.random.rand(11, 2, 3, 1, 4), numpy.random.rand(*topgrad_shape),
dtype='float32' dtype=theano.config.floatX
) )
for params in product( kerns_vals = numpy.zeros(kerns_shape, dtype=theano.config.floatX)
['valid', 'full', 'half'], kerns_shape = theano.shared(numpy.asarray(kerns_shape))
[(1, 1, 1), (2, 2, 2)], topgrad_shape = theano.shared(numpy.asarray(topgrad_shape))
['conv', 'cross']
):
out_vals = numpy.zeros(
dnn.GpuDnnConv3d.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=params[0],
subsample=params[1]),
dtype='float32')
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=params[0], border_mode=border_mode,
subsample=params[1], subsample=subsample,
conv_mode=params[2] conv_mode=conv_mode
)(img.shape, out.shape) )(topgrad_shape, kerns_shape)
conv_grad_w = dnn.GpuDnnConv3dGradW()( conv_grad_w = dnn.GpuDnnConv3dGradW()(
img, img,
out, topgrad,
kerns, kerns,
desc, desc,
) )
self._compile_and_check( self._compile_and_check(
[img, out, kerns], [img, topgrad, kerns],
[conv_grad_w], [conv_grad_w],
[img_val, out_vals, kern_vals], [img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConv3dGradW dnn.GpuDnnConv3dGradW
) )
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv3d_gradw(self, border_mode, conv_mode):
self._test_conv3d_gradw(T.tensor5('img'),
T.tensor5('topgrad'),
T.tensor5('kerns'),
(5, 2, 6, 13, 21),
(1, 2, 3, 7, 9),
border_mode,
conv_mode,
(1, 1, 1))
def test_conv_gradi(self): def test_conv_gradi(self):
if not dnn.dnn_available(): if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
......
...@@ -11,6 +11,7 @@ import theano ...@@ -11,6 +11,7 @@ import theano
from theano.tensor import as_tensor_variable, patternbroadcast from theano.tensor import as_tensor_variable, patternbroadcast
from theano.tensor import get_scalar_constant_value, NotScalarConstantError from theano.tensor import get_scalar_constant_value, NotScalarConstantError
from theano.tensor.opt import Assert
from theano.gof import Apply, Op from theano.gof import Apply, Op
from six.moves import xrange from six.moves import xrange
...@@ -51,11 +52,11 @@ def get_conv_output_shape(image_shape, kernel_shape, ...@@ -51,11 +52,11 @@ def get_conv_output_shape(image_shape, kernel_shape,
or numeric). If it is a string, it must be 'valid', 'half' or 'full'. or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond If it is a tuple, its two (or three) elements respectively correspond
to the padding on height and width (and possibly depth) axis. to the padding on height and width (and possibly depth) axis.
subsample: tuple of int (symbolic or numeric). Its or three elements subsample: tuple of int (symbolic or numeric). Its two or three elements
espectively correspond to the subsampling on height and width (and espectively correspond to the subsampling on height and width (and
possibly depth) axis. possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two elements filter_dilation: tuple of int (symbolic or numeric). Its two or three
correspond respectively to the dilation on height and width axis. elements correspond respectively to the dilation on height and width axis.
Returns Returns
------- -------
...@@ -137,6 +138,374 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode, ...@@ -137,6 +138,374 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
return out_shp return out_shp
def get_conv_gradweights_shape(image_shape, top_shape,
border_mode, subsample,
filter_dilation=None):
"""
This function tries to compute the kernel shape of convolution gradWeights.
The weights shape can only be computed exactly when subsample is 1 and
border_mode is not 'half'. If subsample is not 1 or border_mode is 'half',
this function will return None.
Parameters
----------
image_shape: tuple of int corresponding to the input image shape. Its
four (or five) elements must correspond respectively to: batch size,
number of output channels, height and width of the image. None where
undefined.
top_shape: tuple of int (symbolic or numeric) corresponding to the top
image shape. Its four (or five) element must correspond respectively
to: batch size, number of output channels, height and width (and
possibly depth) of the image. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
to the padding on height and width (and possibly depth) axis.
subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two or three
elements correspond respectively to the dilation on height and
width axis.
Returns
-------
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel shape. Its four (or five) elements correspond respectively
to: number of output channels, number of input channels, height and
width (and possibly depth) of the kernel. None where undefined.
"""
nkern, imshp = image_shape[1], image_shape[2:]
nchan, topshp = top_shape[1], top_shape[2:]
if filter_dilation is None:
filter_dilation = numpy.ones(len(subsample), dtype='int')
if isinstance(border_mode, tuple):
out_shp = tuple(get_conv_gradweights_shape_1axis(
imshp[i], topshp[i], border_mode[i],
subsample[i], filter_dilation[i]) for i in range(len(subsample)))
else:
out_shp = tuple(get_conv_gradweights_shape_1axis(
imshp[i], topshp[i], border_mode,
subsample[i], filter_dilation[i]) for i in range(len(subsample)))
return (nchan, nkern) + out_shp
def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode,
subsample, dilation):
"""
This function tries to compute the image shape of convolution gradWeights.
The weights shape can only be computed exactly when subsample is 1 and
border_mode is not 'half'. If subsample is not 1 or border_mode is 'half',
this function will return None.
Parameters
----------
image_shape: int or None. Corresponds to the input image shape on a
given axis. None if undefined.
top_shape: int or None. Corresponds to the top shape on a given axis.
None if undefined.
border_mode: string or int. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis.
subsample: int. It must correspond to the subsampling on the
considered axis.
dilation: int. It must correspond to the dilation on the
considered axis.
Returns
-------
kernel_shape: int or None. Corresponds to the kernel shape on a given
axis. None if undefined.
"""
if None in [image_shape, top_shape, border_mode,
subsample, dilation]:
return None
if subsample != 1 or border_mode == "half":
return None
if border_mode == "full":
kernel_shape = top_shape - image_shape
elif border_mode == "valid":
kernel_shape = image_shape - top_shape
else:
if border_mode < 0:
raise ValueError("border_mode must be >= 0")
kernel_shape = (image_shape + 2 * border_mode - top_shape)
if dilation > 1:
kernel_shape = kernel_shape / dilation
return kernel_shape + 1
def get_conv_gradinputs_shape(kernel_shape, top_shape,
border_mode, subsample,
filter_dilation=None):
"""
This function tries to compute the image shape of convolution gradInputs.
The image shape can only be computed exactly when subsample is 1.
If subsample for a dimension is not 1, this function will return None for
that dimension.
Parameters
----------
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel shape. Its four (or five) elements must correspond respectively
to: number of output channels, number of input channels, height and
width (and possibly depth) of the kernel. None where undefined.
top_shape: tuple of int (symbolic or numeric) corresponding to the top
image shape. Its four (or five) element must correspond respectively
to: batch size, number of output channels, height and width (and
possibly depth) of the image. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
to the padding on height and width (and possibly depth) axis.
subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two or three
elements correspond respectively to the dilation on height and
width axis.
Returns
-------
image_shape: tuple of int corresponding to the input image shape. Its
four element must correspond respectively to: batch size, number of
output channels, height and width of the image. None where undefined.
"""
bsize, topshp = top_shape[0], top_shape[2:]
nkern, kshp = kernel_shape[1], kernel_shape[2:]
if filter_dilation is None:
filter_dilation = numpy.ones(len(subsample), dtype='int')
if isinstance(border_mode, tuple):
out_shp = tuple(get_conv_gradinputs_shape_1axis(
kshp[i], topshp[i], border_mode[i],
subsample[i], filter_dilation[i]) for i in range(len(subsample)))
else:
out_shp = tuple(get_conv_gradinputs_shape_1axis(
kshp[i], topshp[i], border_mode,
subsample[i], filter_dilation[i]) for i in range(len(subsample)))
return (bsize, nkern) + out_shp
def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode,
subsample, dilation):
"""
This function tries to compute the image shape of convolution gradInputs.
The image shape can only be computed exactly when subsample is 1.
If subsample is not 1, this function will return None.
Parameters
----------
kernel_shape: int or None. Corresponds to the kernel shape on a given
axis. None if undefined.
top_shape: int or None. Corresponds to the top shape on a given axis.
None if undefined.
border_mode: string or int. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis.
subsample: int. It must correspond to the subsampling on the
considered axis.
dilation: int. It must correspond to the dilation on the
considered axis.
Returns
-------
image_shape: int or None. Corresponds to the input image shape on a
given axis. None if undefined.
"""
if None in [kernel_shape, top_shape, border_mode,
subsample, dilation]:
return None
if subsample != 1:
return None
# Implicit dilated kernel shape
dil_kernel_shape = (kernel_shape - 1) * dilation + 1
if border_mode == "half":
pad = dil_kernel_shape // 2
elif border_mode == "full":
pad = dil_kernel_shape - 1
elif border_mode == "valid":
pad = 0
else:
pad = border_mode
if pad < 0:
raise ValueError("border_mode must be >= 0")
# In case of symbolic shape, we want to build the smallest graph
# image_shape = (top_shape - 1) * s - 2 * pad + dil_kernel_shape + a
# where 0 <= a < subsample, but we have checked that subsample == 1
if pad == 0:
image_shape = (top_shape + dil_kernel_shape - 1)
else:
image_shape = (top_shape - 2 * pad + dil_kernel_shape - 1)
return image_shape
def check_conv_gradinputs_shape(image_shape, kernel_shape, output_shape,
border_mode, subsample,
filter_dilation=None):
"""
This function checks if the given image shapes are consistent.
Parameters
----------
image_shape: tuple of int (symbolic or numeric) corresponding to the input
image shape. Its four (or five) element must correspond respectively
to: batch size, number of input channels, height and width (and
possibly depth) of the image. None where undefined.
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel shape. Its four (or five) elements must correspond respectively
to: number of output channels, number of input channels, height and
width (and possibly depth) of the kernel. None where undefined.
output_shape: tuple of int (symbolic or numeric) corresponding to the
output shape. Its four (or five) elements must correspond respectively
to: batch size, number of output channels, height and width
(and possibly depth) of the output. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
to the padding on height and width (and possibly depth) axis.
subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two or three
elements correspond respectively to the dilation on height and
width axis.
Returns
-------
Returns False if a convolution with the given input shape, kernel shape
and parameters would not have produced the given output shape.
Returns True in all other cases: if the given output shape matches the
computed output shape, but also if the shape could not be checked because
because the shape contains symbolic values.
"""
image_shape = tuple(image_shape)
kernel_shape = tuple(kernel_shape)
output_shape = tuple(output_shape)
if len(image_shape) != len(kernel_shape) or len(image_shape) != len(output_shape):
return False
if len(image_shape) - 2 != len(subsample):
return False
if filter_dilation is not None and len(image_shape) - 2 != len(filter_dilation):
return False
# compute the predicted output shape
computed_output_shape = get_conv_output_shape(
image_shape, kernel_shape, border_mode, subsample, filter_dilation)
# check if the given output shape matches the computed shape
def check_dim(given, computed):
if given is None or computed is None:
return True
try:
given = get_scalar_constant_value(given)
computed = get_scalar_constant_value(computed)
return int(given) == int(computed)
except NotScalarConstantError:
# no answer possible, accept for now
return True
return all(check_dim(given, computed)
for (given, computed) in zip(output_shape, computed_output_shape))
def assert_conv_shape(shape):
"""This function adds Assert nodes that check if shape is a valid convolution shape.
The first two dimensions should be larger than or equal to zero. The convolution
dimensions should be larger than zero.
Parameters
----------
shape: tuple of int (symbolic or numeric) corresponding to the input, output or
kernel shape of a convolution. For input and output, the first elements should
should be the batch size and number of channels. For kernels, the first and
second elements should contain the number of input and output channels.
The remaining dimensions are the convolution dimensions.
Returns
-------
Returns a tuple similar to the given `shape`. For constant elements in `shape`,
the function checks the value and raises a `ValueError` if the dimension is invalid.
The elements that are not constant are wrapped in an `Assert` op that checks the
dimension at run time.
"""
out_shape = []
for i, n in enumerate(shape):
try:
const_n = get_scalar_constant_value(n)
if i < 2:
if const_n < 0:
raise ValueError('The convolution would produce an invalid shape (dim[%d]: %d < 0).' % (i, const_n))
else:
if const_n <= 0:
raise ValueError('The convolution would produce an invalid shape (dim[%d]: %d <= 0).' % (i, const_n))
out_shape.append(n)
except NotScalarConstantError:
if i < 2:
assert_shp = Assert('The convolution would produce an invalid shape (dim[%d] < 0).' % i)
out_shape.append(assert_shp(n, theano.tensor.ge(n, 0)))
else:
assert_shp = Assert('The convolution would produce an invalid shape (dim[%d] <= 0).' % i)
out_shape.append(assert_shp(n, theano.tensor.gt(n, 0)))
return tuple(out_shape)
def assert_shape(x, expected_shape, msg='Unexpected shape.'):
"""Wraps `x` in an `Assert` to check its shape.
Parameters
----------
x : Tensor
x will be wrapped in an `Assert`.
expected_shape : tuple or list
The expected shape of `x`. The size of a dimension can be None,
which means it will not be checked.
msg : str
The error message of the `Assert`.
Returns
-------
Tensor
`x` wrapped in an `Assert`. At execution time, this will throw an
AssertionError if the shape of `x` does not match `expected_shape`.
If `expected_shape` is None or contains only Nones, the function
will return `x` directly.
"""
if expected_shape is None:
return x
shape = x.shape
tests = []
for i in range(x.ndim):
if expected_shape[i] is not None:
tests.append(theano.tensor.eq(shape[i], expected_shape[i]))
if tests:
return Assert(msg)(x, *tests)
else:
return x
def conv2d(input, def conv2d(input,
filters, filters,
input_shape=None, input_shape=None,
...@@ -782,7 +1151,7 @@ def conv3d_grad_wrt_weights(input, ...@@ -782,7 +1151,7 @@ def conv3d_grad_wrt_weights(input,
filter_flip=filter_flip, filter_flip=filter_flip,
filter_dilation=filter_dilation) filter_dilation=filter_dilation)
return gradWeight_op(input, output_grad, filter_shape[:-3]) return gradWeight_op(input, output_grad, filter_shape[-3:])
def bilinear_kernel_2D(ratio, normalize=True): def bilinear_kernel_2D(ratio, normalize=True):
...@@ -1209,6 +1578,13 @@ class AbstractConv(BaseAbstractConv): ...@@ -1209,6 +1578,13 @@ class AbstractConv(BaseAbstractConv):
if kern.type.ndim != 2 + self.convdim: if kern.type.ndim != 2 + self.convdim:
raise TypeError('kern must be %dD tensor' % (2 + self.convdim)) raise TypeError('kern must be %dD tensor' % (2 + self.convdim))
img = assert_shape(img, self.imshp,
'AbstractConv shape mismatch: shape of '
'image does not match given imshp.')
kern = assert_shape(kern, self.kshp,
'AbstractConv shape mismatch: shape of '
'filters does not match given kshp.')
broadcastable = [img.broadcastable[0], broadcastable = [img.broadcastable[0],
kern.broadcastable[0]] + ([False] * self.convdim) kern.broadcastable[0]] + ([False] * self.convdim)
output = img.type.clone(broadcastable=broadcastable)() output = img.type.clone(broadcastable=broadcastable)()
...@@ -1420,6 +1796,10 @@ class AbstractConv_gradWeights(BaseAbstractConv): ...@@ -1420,6 +1796,10 @@ class AbstractConv_gradWeights(BaseAbstractConv):
if topgrad.type.ndim != 2 + self.convdim: if topgrad.type.ndim != 2 + self.convdim:
raise TypeError('topgrad must be %dD tensor' % (2 + self.convdim)) raise TypeError('topgrad must be %dD tensor' % (2 + self.convdim))
img = assert_shape(img, self.imshp,
'AbstractConv_gradWeights shape mismatch: shape of '
'image does not match given imshp.')
shape = as_tensor_variable(shape) shape = as_tensor_variable(shape)
broadcastable = [topgrad.broadcastable[1], broadcastable = [topgrad.broadcastable[1],
img.broadcastable[1]] + ([False] * self.convdim) img.broadcastable[1]] + ([False] * self.convdim)
...@@ -1655,6 +2035,10 @@ class AbstractConv_gradInputs(BaseAbstractConv): ...@@ -1655,6 +2035,10 @@ class AbstractConv_gradInputs(BaseAbstractConv):
if topgrad.type.ndim != 2 + self.convdim: if topgrad.type.ndim != 2 + self.convdim:
raise TypeError('topgrad must be %dD tensor' % (2 + self.convdim)) raise TypeError('topgrad must be %dD tensor' % (2 + self.convdim))
kern = assert_shape(kern, self.kshp,
'AbstractConv_gradInputs shape mismatch: shape of '
'filters does not match given kshp.')
shape = as_tensor_variable(shape) shape = as_tensor_variable(shape)
broadcastable = [topgrad.type.broadcastable[0], broadcastable = [topgrad.type.broadcastable[0],
kern.type.broadcastable[1]] + ([False] * self.convdim) kern.type.broadcastable[1]] + ([False] * self.convdim)
...@@ -1675,6 +2059,21 @@ class AbstractConv_gradInputs(BaseAbstractConv): ...@@ -1675,6 +2059,21 @@ class AbstractConv_gradInputs(BaseAbstractConv):
'"valid", "full", "half", an integer or a tuple of' '"valid", "full", "half", an integer or a tuple of'
' integers'.format(mode)) ' integers'.format(mode))
imshp = self.imshp[:] if self.imshp is not None else [None] * (2 + self.convdim)
fallback_imshp = ([topgrad.shape[0], kern.shape[1]] +
[shape[i] for i in range(self.convdim)])
imshp = [fallback_imshp[i] if imshp[i] is None else imshp[i]
for i in range(2 + self.convdim)]
expected_topgrad_shape = get_conv_output_shape(
imshp, kern.shape,
self.border_mode, self.subsample, self.filter_dilation)
if not tuple(expected_topgrad_shape) == tuple(topgrad.shape):
raise ValueError(
'invalid input_shape for gradInputs: the given input_shape '
'would produce an output of shape {}, but the given topgrad '
'has shape {}'.format(tuple(expected_topgrad_shape),
tuple(topgrad.shape)))
dil_kernshp = tuple((kern.shape[i + 2] - 1) * self.filter_dilation[i] + 1 dil_kernshp = tuple((kern.shape[i + 2] - 1) * self.filter_dilation[i] + 1
for i in range(self.convdim)) for i in range(self.convdim))
pad = (0,) * self.convdim pad = (0,) * self.convdim
......
...@@ -123,7 +123,7 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -123,7 +123,7 @@ class BaseCorrMM(gof.OpenMPOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (1, self.openmp, blas_header_version()) return (5, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -234,17 +234,17 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -234,17 +234,17 @@ class BaseCorrMM(gof.OpenMPOp):
# When subsampling, we cannot unambiguously infer the height and width # When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given. # of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size. # Similarly, when border_mode="half", we cannot infer the weight size.
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)): if height:
if not height:
raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'")
height = '(*(npy_int64 *)(PyArray_DATA(%s)))' % height height = '(*(npy_int64 *)(PyArray_DATA(%s)))' % height
else: else:
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'")
height = '-1' height = '-1'
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)): if width:
if not width:
raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'")
width = '(*(npy_int64 *)(PyArray_DATA(%s)))' % width width = '(*(npy_int64 *)(PyArray_DATA(%s)))' % width
else: else:
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'")
width = '-1' width = '-1'
sub = sub.copy() sub = sub.copy()
sub.update(locals()) sub.update(locals())
...@@ -268,15 +268,15 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -268,15 +268,15 @@ class BaseCorrMM(gof.OpenMPOp):
// Obtain or infer kernel width and height // Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding) // (we need to know it early to be able to handle auto-padding)
int kH, kW; int kH, kW, dil_kH, dil_kW;
if (direction != 1) { if (direction != 1) {
// weight is an input variable, we can just read its shape // weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2]; kH = PyArray_DIMS(weights)[2];
kW = PyArray_DIMS(weights)[3]; kW = PyArray_DIMS(weights)[3];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -287,7 +287,8 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -287,7 +287,8 @@ class BaseCorrMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1; kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
// kernel width is specified (perhaps horizontal subsampling or half padding)
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -299,8 +300,8 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -299,8 +300,8 @@ class BaseCorrMM(gof.OpenMPOp):
} }
// Implicit dilated kernel size // Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1; dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1; dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested // Auto-padding if requested
if (padH == -1) { // vertical half padding if (padH == -1) { // vertical half padding
...@@ -334,6 +335,21 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -334,6 +335,21 @@ class BaseCorrMM(gof.OpenMPOp):
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0]; out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1); out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1); out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
%(fail)s
}
break; break;
case 1: // backprop wrt. weights case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width) // output is weights: (num_filters, num_channels, height, width)
...@@ -342,14 +358,44 @@ class BaseCorrMM(gof.OpenMPOp): ...@@ -342,14 +358,44 @@ class BaseCorrMM(gof.OpenMPOp):
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1]; out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[2] = (npy_intp)kH; // already inferred further above out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient out_dim[3] = (npy_intp)kW; // how convenient
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)s
}
break; break;
case 2: // backprop wrt. inputs case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width) // output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0]; out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1]; out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH); out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW); out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld\\n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2\\n");
...@@ -491,13 +537,13 @@ class CorrMM_gradWeights(BaseCorrMM): ...@@ -491,13 +537,13 @@ class CorrMM_gradWeights(BaseCorrMM):
raise TypeError('img must be 4D tensor') raise TypeError('img must be 4D tensor')
if topgrad.type.ndim != 4: if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor') raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) or self.border_mode == "half":
if shape is None: if shape is None:
if self.subsample != (1, 1) or self.border_mode == "half":
raise ValueError('shape must be given if subsample != (1, 1)' raise ValueError('shape must be given if subsample != (1, 1)'
' or border_mode == "half"') ' or border_mode == "half"')
height_width = [as_tensor_variable(shape[0]).astype('int64'), as_tensor_variable(shape[1]).astype('int64')]
else:
height_width = [] height_width = []
else:
height_width = [as_tensor_variable(shape[0]).astype('int64'), as_tensor_variable(shape[1]).astype('int64')]
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1], broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False] False, False]
...@@ -588,9 +634,13 @@ class CorrMM_gradInputs(BaseCorrMM): ...@@ -588,9 +634,13 @@ class CorrMM_gradInputs(BaseCorrMM):
raise TypeError('kern must be 4D tensor') raise TypeError('kern must be 4D tensor')
if topgrad.type.ndim != 4: if topgrad.type.ndim != 4:
raise TypeError('topgrad must be 4D tensor') raise TypeError('topgrad must be 4D tensor')
if self.subsample != (1, 1) and shape is None: if shape is None:
if self.subsample != (1, 1):
raise ValueError('shape must be given if subsample != (1, 1)') raise ValueError('shape must be given if subsample != (1, 1)')
height_width = [as_tensor_variable(shape[0]).astype('int64'), as_tensor_variable(shape[1]).astype('int64')] if self.subsample != (1, 1) else [] height_width = []
else:
height_width = [as_tensor_variable(shape[0]).astype('int64'),
as_tensor_variable(shape[1]).astype('int64')]
broadcastable = [topgrad.type.broadcastable[0], kern.type.broadcastable[1], broadcastable = [topgrad.type.broadcastable[0], kern.type.broadcastable[1],
False, False] False, False]
......
...@@ -123,7 +123,7 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -123,7 +123,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (1, self.openmp, blas_header_version()) return (5, self.openmp, blas_header_version())
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -241,23 +241,23 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -241,23 +241,23 @@ class BaseCorr3dMM(gof.OpenMPOp):
# When subsampling, we cannot unambiguously infer the height and width # When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given. # of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size. # Similarly, when border_mode="half", we cannot infer the weight size.
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)): if height:
if not height:
raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'")
height = '(*(npy_int64 *)(PyArray_DATA(%s)))' % height height = '(*(npy_int64 *)(PyArray_DATA(%s)))' % height
else: else:
if ((direction != 0) and (dH != 1)) or ((direction == 1) and (padH == -1)):
raise ValueError("height must be given for backprop with vertical sampling or border_mode='half'")
height = '-1' height = '-1'
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)): if width:
if not width:
raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'")
width = '(*(npy_int64 *)(PyArray_DATA(%s)))' % width width = '(*(npy_int64 *)(PyArray_DATA(%s)))' % width
else: else:
if ((direction != 0) and (dW != 1)) or ((direction == 1) and (padW == -1)):
raise ValueError("width must be given for backprop with horizontal sampling or border_mode='half'")
width = '-1' width = '-1'
if ((direction != 0) and (dD != 1)) or ((direction == 1) and (padD == -1)): if depth:
if not depth:
raise ValueError("depth must be given for backprop with depth sampling or border_mode='half'")
depth = '(*(npy_int64 *)(PyArray_DATA(%s)))' % depth depth = '(*(npy_int64 *)(PyArray_DATA(%s)))' % depth
else: else:
if ((direction != 0) and (dD != 1)) or ((direction == 1) and (padD == -1)):
raise ValueError("depth must be given for backprop with depth sampling or border_mode='half'")
depth = '-1' depth = '-1'
sub = sub.copy() sub = sub.copy()
sub.update(locals()) sub.update(locals())
...@@ -284,7 +284,7 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -284,7 +284,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
// Obtain or infer kernel width, height and depth // Obtain or infer kernel width, height and depth
// (we need to know it early to be able to handle auto-padding) // (we need to know it early to be able to handle auto-padding)
int kH, kW, kD; int kH, kW, kD, dil_kH, dil_kW, dil_kD;
if (direction != 1) { if (direction != 1) {
// weight is an input variable, we can just read its shape // weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2]; kH = PyArray_DIMS(weights)[2];
...@@ -292,8 +292,8 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -292,8 +292,8 @@ class BaseCorr3dMM(gof.OpenMPOp):
kD = PyArray_DIMS(weights)[4]; kD = PyArray_DIMS(weights)[4];
} }
else { else {
if ((dH != 1) || (padH == -1)) { if (%(height)s != -1) {
// vertical subsampling or half padding, kernel height is specified // kernel height is specified (perhaps vertical subsampling or half padding)
kH = %(height)s; kH = %(height)s;
} }
else if (padH == -2) { else if (padH == -2) {
...@@ -304,7 +304,7 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -304,7 +304,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
// explicit padding, we can infer the kernel height // explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1; kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
} }
if ((dW != 1) || (padW == -1)) { if (%(width)s != -1) {
kW = %(width)s; kW = %(width)s;
} }
else if (padW == -2) { else if (padW == -2) {
...@@ -313,7 +313,7 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -313,7 +313,7 @@ class BaseCorr3dMM(gof.OpenMPOp):
else { else {
kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
} }
if ((dD != 1) || (padD == -1)) { if (%(depth)s != -1) {
kD = %(depth)s; kD = %(depth)s;
} }
else if (padD == -2) { else if (padD == -2) {
...@@ -325,9 +325,9 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -325,9 +325,9 @@ class BaseCorr3dMM(gof.OpenMPOp):
} }
// Implicit dilated kernel size // Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1; dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1; dil_kW = (kW - 1) * dilW + 1;
int dil_kD = (kD - 1) * dilD + 1; dil_kD = (kD - 1) * dilD + 1;
// Auto-padding if requested // Auto-padding if requested
if (padH == -1) { // vertical half padding if (padH == -1) { // vertical half padding
...@@ -372,6 +372,23 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -372,6 +372,23 @@ class BaseCorr3dMM(gof.OpenMPOp):
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1); out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1); out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1);
out_dim[4] = (npy_intp)((PyArray_DIMS(bottom)[4] + 2*padD - ((PyArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1); out_dim[4] = (npy_intp)((PyArray_DIMS(bottom)[4] + 2*padD - ((PyArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"Corr3dMM: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(bottom)[4],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4]);
%(fail)s
}
break; break;
case 1: // backprop wrt. weights case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width, depth) // output is weights: (num_filters, num_channels, height, width, depth)
...@@ -381,15 +398,49 @@ class BaseCorr3dMM(gof.OpenMPOp): ...@@ -381,15 +398,49 @@ class BaseCorr3dMM(gof.OpenMPOp):
out_dim[2] = (npy_intp)kH; // already inferred further above out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient out_dim[3] = (npy_intp)kW; // how convenient
out_dim[4] = (npy_intp)kD; out_dim[4] = (npy_intp)kD;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"Corr3dMM backprop wrt. weights: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(bottom)[4],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3],
(long int)PyArray_DIMS(top)[4]);
%(fail)s
}
break; break;
case 2: // backprop wrt. inputs case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width, depth) // output is bottom: (batchsize, num_channels, height, width, depth)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0]; out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1]; out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH); out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW); out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[4] = (npy_intp)((dD != 1) ? %(depth)s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD); out_dim[4] = (npy_intp)((%(depth)s != -1) ? %(depth)s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0)
{
PyErr_Format(PyExc_ValueError,
"Corr3dMM backprop wrt. inputs: impossible output shape\\n"
" bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n"
" top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3],
(long int)PyArray_DIMS(top)[4]);
%(fail)s
}
break; break;
default: default:
PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2\\n"); PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2\\n");
...@@ -533,15 +584,15 @@ class Corr3dMM_gradWeights(BaseCorr3dMM): ...@@ -533,15 +584,15 @@ class Corr3dMM_gradWeights(BaseCorr3dMM):
raise TypeError('img must be 5D tensor') raise TypeError('img must be 5D tensor')
if topgrad.type.ndim != 5: if topgrad.type.ndim != 5:
raise TypeError('topgrad must be 5D tensor') raise TypeError('topgrad must be 5D tensor')
if self.subsample != (1, 1, 1) or self.border_mode == "half":
if shape is None: if shape is None:
if self.subsample != (1, 1, 1) or self.border_mode == "half":
raise ValueError('shape must be given if subsample != (1, 1, 1)' raise ValueError('shape must be given if subsample != (1, 1, 1)'
' or border_mode == "half"') ' or border_mode == "half"')
height_width_depth = []
else:
height_width_depth = [as_tensor_variable(shape[0]).astype('int64'), height_width_depth = [as_tensor_variable(shape[0]).astype('int64'),
as_tensor_variable(shape[1]).astype('int64'), as_tensor_variable(shape[1]).astype('int64'),
as_tensor_variable(shape[2]).astype('int64')] as_tensor_variable(shape[2]).astype('int64')]
else:
height_width_depth = []
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1], broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False, False] False, False, False]
...@@ -638,14 +689,14 @@ class Corr3dMM_gradInputs(BaseCorr3dMM): ...@@ -638,14 +689,14 @@ class Corr3dMM_gradInputs(BaseCorr3dMM):
raise TypeError('kern must be 5D tensor') raise TypeError('kern must be 5D tensor')
if topgrad.type.ndim != 5: if topgrad.type.ndim != 5:
raise TypeError('topgrad must be 5D tensor') raise TypeError('topgrad must be 5D tensor')
if self.subsample != (1, 1, 1) and shape is None: if shape is None:
raise ValueError('shape must be given if subsample != (1, 1, 1)')
if self.subsample != (1, 1, 1): if self.subsample != (1, 1, 1):
raise ValueError('shape must be given if subsample != (1, 1, 1)')
height_width_depth = []
else:
height_width_depth = [as_tensor_variable(shape[0]).astype('int64'), height_width_depth = [as_tensor_variable(shape[0]).astype('int64'),
as_tensor_variable(shape[1]).astype('int64'), as_tensor_variable(shape[1]).astype('int64'),
as_tensor_variable(shape[2]).astype('int64')] as_tensor_variable(shape[2]).astype('int64')]
else:
height_width_depth = []
broadcastable = [topgrad.type.broadcastable[0], kern.type.broadcastable[1], broadcastable = [topgrad.type.broadcastable[0], kern.type.broadcastable[1],
False, False, False] False, False, False]
......
...@@ -188,9 +188,17 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom, ...@@ -188,9 +188,17 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
const int dil_kW = (kW - 1) * dilW + 1; const int dil_kW = (kW - 1) * dilW + 1;
const int dil_kD = (kD - 1) * dilD + 1; const int dil_kD = (kD - 1) * dilD + 1;
// top: (batchSize, nFilters, topHeight, topWidth, topDepth) // top: (batchSize, nFilters, topHeight, topWidth, topDepth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
const int topDepth = (bottomDepth + 2*padD - dil_kD) / dD + 1; const int topDepthNoDD = (bottomDepth + 2*padD - dil_kD);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
const int topDepth = _CONV_FLOORDIV_X(topDepthNoDD, dD) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyArray_DIMS(top)[0] || if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] || nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] || topHeight != PyArray_DIMS(top)[2] ||
...@@ -245,7 +253,23 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom, ...@@ -245,7 +253,23 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
char Trans = 'T'; char Trans = 'T';
PyArrayObject *output; PyArrayObject *output;
if (direction == 0) { // forward pass if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
switch(direction) {
case 0:
output = top;
break;
case 1:
output = weight;
break;
case 2:
output = bottom;
break;
default:
return NULL;
}
PyArray_FILLWBYTE(output, 0);
}
else if (direction == 0) { // forward pass
output = top; output = top;
// valid correlation: im3d2col, then gemm // valid correlation: im3d2col, then gemm
// Iterate over batch // Iterate over batch
......
...@@ -164,8 +164,15 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -164,8 +164,15 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const int dil_kH = (kH - 1) * dilH + 1; const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1; const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth) // top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - dil_kH) / dH + 1; const int topHeightNoDH = (bottomHeight + 2*padH - dil_kH);
const int topWidth = (bottomWidth + 2*padW - dil_kW) / dW + 1; const int topWidthNoDW = (bottomWidth + 2*padW - dil_kW);
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
#define _CONV_FLOORDIV_X(x,y) ((x < 0) ? (- ((-x) / y) - (((-x) %% y) == 0 ? 0 : 1)) : (x / y))
const int topHeight = _CONV_FLOORDIV_X(topHeightNoDH, dH) + 1;
const int topWidth = _CONV_FLOORDIV_X(topWidthNoDW, dW) + 1;
#undef _CONV_FLOORDIV
if (batchSize != PyArray_DIMS(top)[0] || if (batchSize != PyArray_DIMS(top)[0] ||
nFilters != PyArray_DIMS(top)[1] || nFilters != PyArray_DIMS(top)[1] ||
topHeight != PyArray_DIMS(top)[2] || topHeight != PyArray_DIMS(top)[2] ||
...@@ -219,7 +226,23 @@ PyArrayObject* corrMM(PyArrayObject* bottom, ...@@ -219,7 +226,23 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
char Trans = 'T'; char Trans = 'T';
PyArrayObject *output; PyArrayObject *output;
if (direction == 0) { // forward pass if (batchSize == 0 || nChannels == 0 || nFilters == 0) {
switch(direction) {
case 0:
output = top;
break;
case 1:
output = weight;
break;
case 2:
output = bottom;
break;
default:
return NULL;
}
PyArray_FILLWBYTE(output, 0);
}
else if (direction == 0) { // forward pass
output = top; output = top;
// valid correlation: im2col, then gemm // valid correlation: im2col, then gemm
// Iterate over batch // Iterate over batch
......
...@@ -10,7 +10,12 @@ from theano import tensor ...@@ -10,7 +10,12 @@ from theano import tensor
from theano.gof.opt import check_stack_trace from theano.gof.opt import check_stack_trace
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.nnet import corr, corr3d, abstract_conv as conv from theano.tensor.nnet import corr, corr3d, abstract_conv as conv
from theano.tensor.nnet.abstract_conv import get_conv_output_shape from theano.tensor.nnet.abstract_conv import (get_conv_output_shape,
get_conv_gradweights_shape,
get_conv_gradinputs_shape,
check_conv_gradinputs_shape,
assert_conv_shape,
assert_shape)
from theano.tensor.nnet.abstract_conv import AbstractConv2d from theano.tensor.nnet.abstract_conv import AbstractConv2d
from theano.tensor.nnet.abstract_conv import AbstractConv2d_gradInputs from theano.tensor.nnet.abstract_conv import AbstractConv2d_gradInputs
from theano.tensor.nnet.abstract_conv import AbstractConv2d_gradWeights from theano.tensor.nnet.abstract_conv import AbstractConv2d_gradWeights
...@@ -133,6 +138,198 @@ class TestGetConvOutShape(unittest.TestCase): ...@@ -133,6 +138,198 @@ class TestGetConvOutShape(unittest.TestCase):
self.assertTrue(test4_params == (3, 4, 6, 4, 10)) self.assertTrue(test4_params == (3, 4, 6, 4, 10))
class TestConvGradInputsShape(unittest.TestCase):
def test_check_shape(self):
for i in range(1, 20):
for k in range(1, 10):
for b in ('valid', 'half', 'full', (0, 2)):
for s in (1, 2, 3):
for d in (1, 2, 3):
image_shape = (59, 61, i, i)
kernel_shape = (67, 61, k, k)
# compute the output that these inputs and parameters would produce
computed_shape = get_conv_output_shape(
image_shape, kernel_shape, b, (s, s), (d, d))
# this should be accepted
self.assertTrue(check_conv_gradinputs_shape(
image_shape, kernel_shape, computed_shape, b, (s, s), (d, d)))
# one or more None should also be accepted
trial_shape = (None, None, computed_shape[2], None)
self.assertTrue(check_conv_gradinputs_shape(
image_shape, kernel_shape, trial_shape, b, (s, s), (d, d)))
# the batch size and number of filters are important
trial_shape = (1, 1, computed_shape[2], computed_shape[3])
self.assertFalse(check_conv_gradinputs_shape(
image_shape, kernel_shape, trial_shape, b, (s, s), (d, d)))
# outputs that are too large or too small should be rejected
for o in (-3, -2, -1, 1, 2, 3):
trial_shape = (computed_shape[0], computed_shape[1],
computed_shape[2] + o, computed_shape[3] + o)
self.assertFalse(check_conv_gradinputs_shape(
image_shape, kernel_shape, trial_shape, b, (s, s), (d, d)))
def test_get_shape(self):
for i in range(1, 20):
for k in range(1, 10):
for b in ('valid', 'half', 'full', (0, 2)):
for d in (1, 2, 3):
image_shape = (59, 61, i, i)
kernel_shape = (67, 61, k, k)
# compute the output that these inputs and parameters would produce
output_shape = get_conv_output_shape(
image_shape, kernel_shape, b, (1, 1), (d, d))
# compute the image_shape given this output_shape
computed_image_shape = get_conv_gradinputs_shape(
kernel_shape, output_shape, b, (1, 1), (d, d))
self.assertEqual(computed_image_shape, image_shape)
# if subsample > 1, the shape should be None
computed_image_shape = get_conv_gradinputs_shape(
kernel_shape, output_shape, b, (2, 3), (d, d))
image_shape_with_None = image_shape[:2] + (None, None)
self.assertEqual(computed_image_shape, image_shape_with_None)
# compute the kernel_shape given this output_shape
computed_kernel_shape = get_conv_gradweights_shape(
image_shape, output_shape, b, (1, 1), (d, d))
# if border_mode == 'half', the shape should be None
if b == 'half':
kernel_shape_with_None = kernel_shape[:2] + (None, None)
self.assertEqual(computed_kernel_shape, kernel_shape_with_None)
else:
self.assertEqual(computed_kernel_shape, kernel_shape)
# if subsample > 1, the shape should be None
computed_kernel_shape = get_conv_gradweights_shape(
kernel_shape, output_shape, b, (2, 3), (d, d))
kernel_shape_with_None = kernel_shape[:2] + (None, None)
self.assertEqual(computed_kernel_shape, kernel_shape_with_None)
class TestAssertConvShape(unittest.TestCase):
def test_basic(self):
shape = tuple(tensor.iscalar() for i in range(4))
f = theano.function(shape, assert_conv_shape(shape))
self.assertEqual([1, 2, 3, 4], f(1, 2, 3, 4))
self.assertEqual([0, 0, 1, 1], f(0, 0, 1, 1))
assert_raises(AssertionError, f, 3, 3, 3, 0)
assert_raises(AssertionError, f, 3, 3, 0, 3)
assert_raises(AssertionError, f, 3, 3, -1, 3)
assert_raises(AssertionError, f, 3, -1, 3, 3)
assert_raises(AssertionError, f, -1, 3, 3, 3)
class TestAssertShape(unittest.TestCase):
def test_basic(self):
x = tensor.tensor4()
s1 = tensor.iscalar()
s2 = tensor.iscalar()
expected_shape = [None, s1, s2, None]
f = theano.function([x, s1, s2], assert_shape(x, expected_shape))
v = numpy.zeros((3, 5, 7, 11), dtype='float32')
self.assertEqual(0, numpy.sum(f(v, 5, 7)))
assert_raises(AssertionError, f, v, 5, 0)
assert_raises(AssertionError, f, v, 5, 9)
assert_raises(AssertionError, f, v, 0, 7)
assert_raises(AssertionError, f, v, 7, 7)
def test_shape_check_conv2d(self):
input = tensor.tensor4()
filters = tensor.tensor4()
out = conv.conv2d(input, filters,
input_shape=(3, 5, 7, 11),
filter_shape=(7, 5, 3, 3))
f = theano.function([input, filters], out)
# mismatched input_shape
assert_raises(AssertionError, f,
numpy.zeros((3, 5, 9, 11), dtype='float32'),
numpy.zeros((7, 5, 3, 3), dtype='float32'))
# mismatched filter_shape
assert_raises(AssertionError, f,
numpy.zeros((3, 5, 7, 11), dtype='float32'),
numpy.zeros((7, 5, 2, 2), dtype='float32'))
def test_shape_check_conv3d(self):
input = tensor.tensor5()
filters = tensor.tensor5()
out = conv.conv3d(input, filters,
input_shape=(3, 5, 7, 11, 13),
filter_shape=(7, 5, 3, 3, 3))
f = theano.function([input, filters], out)
# mismatched input_shape
assert_raises(AssertionError, f,
numpy.zeros((3, 5, 9, 11, 13), dtype='float32'),
numpy.zeros((7, 5, 3, 3, 3), dtype='float32'))
# mismatched filter_shape
assert_raises(AssertionError, f,
numpy.zeros((3, 5, 7, 11, 13), dtype='float32'),
numpy.zeros((7, 5, 2, 2, 2), dtype='float32'))
def test_shape_check_conv2d_grad_wrt_inputs(self):
output_grad = tensor.tensor4()
filters = tensor.tensor4()
out = conv.conv2d_grad_wrt_inputs(output_grad, filters,
input_shape=(None, None, 7, 11),
filter_shape=(7, 5, 3, 3))
f = theano.function([output_grad, filters], out)
# mismatched filter_shape
assert_raises(AssertionError, f,
numpy.zeros((3, 6, 5, 9), dtype='float32'),
numpy.zeros((7, 6, 3, 3), dtype='float32'))
def test_shape_check_conv3d_grad_wrt_inputs(self):
output_grad = tensor.tensor5()
filters = tensor.tensor5()
out = conv.conv3d_grad_wrt_inputs(output_grad, filters,
input_shape=(None, None, 7, 11, 13),
filter_shape=(7, 5, 3, 3, 3))
f = theano.function([output_grad, filters], out)
# mismatched filter_shape
assert_raises(AssertionError, f,
numpy.zeros((3, 6, 5, 9, 11), dtype='float32'),
numpy.zeros((7, 6, 3, 3, 3), dtype='float32'))
def test_shape_check_conv2d_grad_wrt_weights(self):
input = tensor.tensor4()
output_grad = tensor.tensor4()
out = conv.conv2d_grad_wrt_weights(input, output_grad,
filter_shape=(None, None, 3, 3),
input_shape=(3, 5, 7, 11))
f = theano.function([input, output_grad], out)
# mismatched filter_shape
assert_raises(AssertionError, f,
numpy.zeros((3, 6, 7, 11), dtype='float32'),
numpy.zeros((3, 7, 5, 9), dtype='float32'))
def test_shape_check_conv3d_grad_wrt_weights(self):
input = tensor.tensor5()
output_grad = tensor.tensor5()
out = conv.conv3d_grad_wrt_weights(input, output_grad,
filter_shape=(None, None, 3, 3, 3),
input_shape=(3, 5, 7, 11, 13))
f = theano.function([input, output_grad], out)
# mismatched filter_shape
assert_raises(AssertionError, f,
numpy.zeros((3, 6, 7, 11, 13), dtype='float32'),
numpy.zeros((3, 7, 5, 9, 11), dtype='float32'))
class BaseTestConv(object): class BaseTestConv(object):
def get_output_shape(self, inputs_shape, filters_shape, def get_output_shape(self, inputs_shape, filters_shape,
subsample, border_mode, filter_dilation): subsample, border_mode, filter_dilation):
...@@ -211,7 +408,7 @@ class BaseTestConv(object): ...@@ -211,7 +408,7 @@ class BaseTestConv(object):
res_ref = numpy.array(f_ref()) res_ref = numpy.array(f_ref())
res = numpy.array(f()) res = numpy.array(f())
utt.assert_allclose(res_ref, res) utt.assert_allclose(res_ref, res)
if verify_grad: if verify_grad and inputs_val.size > 0 and filters_val.size > 0 and res.size > 0:
utt.verify_grad(conv_op(border_mode=border_mode, utt.verify_grad(conv_op(border_mode=border_mode,
imshp=imshp, kshp=kshp, imshp=imshp, kshp=kshp,
subsample=subsample, subsample=subsample,
...@@ -277,7 +474,7 @@ class BaseTestConv(object): ...@@ -277,7 +474,7 @@ class BaseTestConv(object):
filter_dilation=filter_dilation) filter_dilation=filter_dilation)
return conv_op(inputs_val, output_val, filters_shape[2:]) return conv_op(inputs_val, output_val, filters_shape[2:])
if verify_grad: if verify_grad and inputs_val.size > 0 and output_val.size > 0 and res.size > 0:
utt.verify_grad(abstract_conv_gradweight, utt.verify_grad(abstract_conv_gradweight,
[inputs_val, output_val], [inputs_val, output_val],
mode=mode, eps=1) mode=mode, eps=1)
...@@ -314,10 +511,15 @@ class BaseTestConv(object): ...@@ -314,10 +511,15 @@ class BaseTestConv(object):
imshp=imshp, kshp=kshp, imshp=imshp, kshp=kshp,
filter_dilation=filter_dilation) filter_dilation=filter_dilation)
c = c(filters, output, inputs_shape[2:]) c = c(filters, output, inputs_shape[2:])
f = theano.function([], c, mode=mode)
# ref is set to None for the inconsistent-shape tests.
# The reference function also raises an exception, which would
# mask the exception generated by the target implementation.
if ref is not None:
c_ref = ref(filters, output, inputs_shape, c_ref = ref(filters, output, inputs_shape,
border_mode=border_mode, subsample=subsample, border_mode=border_mode, subsample=subsample,
conv_mode=conv_mode, filter_dilation=filter_dilation) conv_mode=conv_mode, filter_dilation=filter_dilation)
f = theano.function([], c, mode=mode)
f_ref = theano.function([], c_ref, mode='FAST_RUN') f_ref = theano.function([], c_ref, mode='FAST_RUN')
if target_op is not None: if target_op is not None:
...@@ -326,8 +528,10 @@ class BaseTestConv(object): ...@@ -326,8 +528,10 @@ class BaseTestConv(object):
if check_trace: if check_trace:
assert_true(check_stack_trace(f, ops_to_check=target_op)) assert_true(check_stack_trace(f, ops_to_check=target_op))
res_ref = numpy.array(f_ref())
res = numpy.array(f()) res = numpy.array(f())
if ref is not None:
res_ref = numpy.array(f_ref())
utt.assert_allclose(res_ref, res) utt.assert_allclose(res_ref, res)
def abstract_conv_gradinputs(filters_val, output_val): def abstract_conv_gradinputs(filters_val, output_val):
...@@ -336,7 +540,7 @@ class BaseTestConv(object): ...@@ -336,7 +540,7 @@ class BaseTestConv(object):
filter_dilation=filter_dilation) filter_dilation=filter_dilation)
return conv_op(filters_val, output_val, inputs_shape[2:]) return conv_op(filters_val, output_val, inputs_shape[2:])
if verify_grad: if verify_grad and filters_val.size > 0 and output_val.size > 0 and res.size > 0:
utt.verify_grad(abstract_conv_gradinputs, utt.verify_grad(abstract_conv_gradinputs,
[filters_val, output_val], [filters_val, output_val],
mode=mode, eps=1) mode=mode, eps=1)
...@@ -351,6 +555,7 @@ class BaseTestConv(object): ...@@ -351,6 +555,7 @@ class BaseTestConv(object):
for (i, f) in zip(self.inputs_shapes, self.filters_shapes): for (i, f) in zip(self.inputs_shapes, self.filters_shapes):
for provide_shape in self.provide_shape: for provide_shape in self.provide_shape:
yield (self.tcase, i, f, ds, db, dflip, provide_shape) yield (self.tcase, i, f, ds, db, dflip, provide_shape)
if min(i) > 0 and min(f) > 0:
for fd in self.filters_dilations: for fd in self.filters_dilations:
for s in self.subsamples: for s in self.subsamples:
for b in self.border_modes: for b in self.border_modes:
...@@ -365,12 +570,15 @@ class BaseTestConv2d(BaseTestConv): ...@@ -365,12 +570,15 @@ class BaseTestConv2d(BaseTestConv):
def setup_class(cls): def setup_class(cls):
# This tests can run even when theano.config.blas.ldflags is empty. # This tests can run even when theano.config.blas.ldflags is empty.
cls.inputs_shapes = [(8, 1, 6, 6), (8, 1, 8, 8), (2, 1, 7, 7), cls.inputs_shapes = [(8, 1, 6, 6), (8, 1, 8, 8), (2, 1, 7, 7),
(6, 1, 10, 11), (2, 1, 6, 5), (1, 5, 9, 9)] (6, 1, 10, 11), (2, 1, 6, 5), (1, 5, 9, 9),
(0, 1, 6, 6), (1, 0, 6, 6), (1, 1, 6, 6)]
cls.filters_shapes = [(5, 1, 2, 2), (4, 1, 3, 3), (2, 1, 3, 3), cls.filters_shapes = [(5, 1, 2, 2), (4, 1, 3, 3), (2, 1, 3, 3),
(1, 1, 2, 3), (4, 1, 1, 3), (4, 5, 3, 2)] (1, 1, 2, 3), (4, 1, 1, 3), (4, 5, 3, 2),
(1, 1, 2, 2), (1, 0, 2, 2), (0, 1, 2, 2)]
cls.subsamples = [(1, 1), (2, 2), (2, 4)] cls.subsamples = [(1, 1), (2, 2), (2, 4)]
cls.default_subsamples = (1, 1) cls.default_subsamples = (1, 1)
cls.filters_dilations = [(1, 1), (1, 2), (2, 1)] cls.filters_dilations = [(1, 1), (1, 2), (2, 1)]
cls.default_filters_dilations = (1, 1)
cls.border_modes = ["valid", "half", "full", (0, 0), (1, 1), (5, 5), (5, 2)] cls.border_modes = ["valid", "half", "full", (0, 0), (1, 1), (5, 5), (5, 2)]
cls.default_border_mode = (0, 0) cls.default_border_mode = (0, 0)
cls.filter_flip = [True, False] cls.filter_flip = [True, False]
...@@ -379,6 +587,62 @@ class BaseTestConv2d(BaseTestConv): ...@@ -379,6 +587,62 @@ class BaseTestConv2d(BaseTestConv):
cls.default_provide_shape = True cls.default_provide_shape = True
cls.shared = staticmethod(theano.compile.shared) cls.shared = staticmethod(theano.compile.shared)
def test_gradinput_arbitrary_output_shapes(self):
# this computes the grad wrt inputs for an output shape
# that the forward convolution would not produce
input_shape = (2, 1, 7, 7)
filter_shape = (2, 1, 3, 3)
for output_shape in [(2, 2, 8, 8), (2, 2, 9, 9), (2, 2, 12, 12)]:
for border_mode in ["valid", "half", "full"]:
computed_shape = get_conv_output_shape(
input_shape, filter_shape, border_mode, self.default_subsamples, self.default_filters_dilations)
# is this a valid combination?
if tuple(computed_shape) == output_shape:
yield (self.tcase_gi,
input_shape,
filter_shape,
output_shape,
self.default_subsamples,
border_mode,
True,
True,
self.default_filters_dilations,
False)
else:
# expect an error
yield (self.tcase_gi,
input_shape,
filter_shape,
output_shape,
self.default_subsamples,
border_mode,
True,
True,
self.default_filters_dilations,
True)
def test_gradinput_impossible_output_shapes(self):
def run_for_output_offsets(image_shape, kernel_shape, s, border_mode, d):
# outputs that are too large or too small should be rejected
for o in (-3, -1, 1, 2):
output_shape = (1, 1, computed_shape[2] + o, computed_shape[3] + o)
# expect an error
self.tcase_gi(image_shape, kernel_shape, output_shape,
(s, s), border_mode, True, True, (d, d), True)
for (i, k) in ((1, 1), (1, 2), (2, 1), (4, 2), (4, 3), (7, 3), (9, 5)):
for border_mode in ('valid', 'half', 'full', (0, 2)):
for (s, d) in ((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (1, 3)):
image_shape = (1, 1, i, i)
kernel_shape = (1, 1, k, k)
# compute the output that these inputs and parameters would produce
computed_shape = get_conv_output_shape(
image_shape, kernel_shape, border_mode, (s, s), (d, d))
yield (run_for_output_offsets,
image_shape, kernel_shape, s, border_mode, d)
def run_fwd(self, inputs_shape, filters_shape, def run_fwd(self, inputs_shape, filters_shape,
conv_fn=conv.conv2d, conv_op=conv.AbstractConv2d, conv_fn=conv.conv2d, conv_op=conv.AbstractConv2d,
ref=conv2d_corr, **kwargs): ref=conv2d_corr, **kwargs):
...@@ -438,6 +702,26 @@ class TestCorrConv2d(BaseTestConv2d): ...@@ -438,6 +702,26 @@ class TestCorrConv2d(BaseTestConv2d):
filter_flip=flip, target_op=CorrMM_gradInputs, filter_flip=flip, target_op=CorrMM_gradInputs,
check_trace=True, filter_dilation=fd) check_trace=True, filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
# This tests can run even when theano.config.blas.ldflags is empty.
if (not theano.config.cxx or
theano.config.mode == "FAST_COMPILE"):
raise SkipTest("Need blas to test conv2d")
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=True,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=CorrMM_gradInputs,
check_trace=True, filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=False,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=CorrMM_gradInputs,
ref=None, check_trace=True, filter_dilation=fd)
class TestAbstractConvNoOptim(BaseTestConv2d): class TestAbstractConvNoOptim(BaseTestConv2d):
@classmethod @classmethod
...@@ -477,6 +761,25 @@ class TestAbstractConvNoOptim(BaseTestConv2d): ...@@ -477,6 +761,25 @@ class TestAbstractConvNoOptim(BaseTestConv2d):
check_trace=True, filter_dilation=fd, check_trace=True, filter_dilation=fd,
mode=mode) mode=mode)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
mode = theano.Mode(optimizer=None)
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=True,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=None,
check_trace=True, filter_dilation=fd,
mode=mode)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=False,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=None,
check_trace=True, filter_dilation=fd,
ref=None, mode=mode)
class TestCpuConv2d(BaseTestConv2d): class TestCpuConv2d(BaseTestConv2d):
@classmethod @classmethod
...@@ -592,16 +895,47 @@ class TestCpuConv2d(BaseTestConv2d): ...@@ -592,16 +895,47 @@ class TestCpuConv2d(BaseTestConv2d):
check_trace=True, check_trace=True,
filter_dilation=fd) filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1), expect_error=False):
if fd != (1, 1):
raise SkipTest("No dilation implementation for basic cpu ConvOp.")
mode = self.mode
if not flip:
return
if b not in ((0, 0), 'valid', 'full'):
return
if (not provide_shape) and (s != (1, 1)) and (b == 'full'):
return
if ((s[0] not in (1, 2)) or (s[1] not in (1, 2))) and (b == 'full'):
return
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=(ConvOp, ConvTransp3D),
check_trace=True,
filter_dilation=fd)
else:
# we do not check for inconsistent shapes,
# because this older implementation does not check that
raise SkipTest('Inconsistent shapes are not tested for old cpu ConvOp.')
class BaseTestConv3d(BaseTestConv): class BaseTestConv3d(BaseTestConv):
@classmethod @classmethod
def setup_class(cls): def setup_class(cls):
# This tests can run even when theano.config.blas.ldflags is empty. # This tests can run even when theano.config.blas.ldflags is empty.
cls.inputs_shapes = [(2, 1, 5, 5, 5), (1, 2, 7, 5, 6)] cls.inputs_shapes = [(2, 1, 5, 5, 5), (1, 2, 7, 5, 6),
cls.filters_shapes = [(2, 1, 2, 2, 2), (1, 2, 2, 1, 3)] (0, 1, 5, 5, 5), (1, 0, 5, 5, 5), (1, 1, 5, 5, 5)]
cls.filters_shapes = [(2, 1, 2, 2, 2), (1, 2, 2, 1, 3),
(1, 1, 2, 2, 2), (1, 0, 2, 2, 2), (0, 1, 2, 2, 2)]
cls.subsamples = [(1, 1, 1), (2, 2, 2), (1, 2, 3)] cls.subsamples = [(1, 1, 1), (2, 2, 2), (1, 2, 3)]
cls.default_subsamples = (1, 1, 1) cls.default_subsamples = (1, 1, 1)
cls.filters_dilations = [(1, 1, 1), (1, 2, 1), (2, 1, 2)] cls.filters_dilations = [(1, 1, 1), (1, 2, 1), (2, 1, 2)]
cls.default_filters_dilations = (1, 1, 1)
cls.border_modes = ["valid", "half", "full", (0, 0, 0), (2, 2, 3)] cls.border_modes = ["valid", "half", "full", (0, 0, 0), (2, 2, 3)]
cls.default_border_mode = (0, 0, 0) cls.default_border_mode = (0, 0, 0)
cls.filter_flip = [True, False] cls.filter_flip = [True, False]
...@@ -610,6 +944,64 @@ class BaseTestConv3d(BaseTestConv): ...@@ -610,6 +944,64 @@ class BaseTestConv3d(BaseTestConv):
cls.default_provide_shape = True cls.default_provide_shape = True
cls.shared = staticmethod(theano.compile.shared) cls.shared = staticmethod(theano.compile.shared)
def test_gradinput_arbitrary_output_shapes(self):
# this computes the grad wrt inputs for an output shape
# that the forward convolution would not produce
input_shape = (2, 1, 7, 7, 7)
filter_shape = (1, 1, 3, 3, 3)
for output_shape in [(2, 1, 8, 8, 8), (2, 1, 9, 9, 9), (2, 1, 12, 12, 12)]:
for border_mode in ["valid", "half", "full"]:
# compute the output that these inputs and parameters would produce
computed_shape = get_conv_output_shape(
input_shape, filter_shape, border_mode, self.default_subsamples, self.default_filters_dilations)
# is this a valid combination?
if tuple(computed_shape) == output_shape:
yield (self.tcase_gi,
input_shape,
filter_shape,
output_shape,
self.default_subsamples,
border_mode,
True,
True,
self.default_filters_dilations,
False)
else:
# expect an error
yield (self.tcase_gi,
input_shape,
filter_shape,
output_shape,
self.default_subsamples,
border_mode,
True,
True,
self.default_filters_dilations,
True)
def test_gradinput_impossible_output_shapes(self):
def run_for_output_offsets(image_shape, kernel_shape, s, border_mode, d):
# outputs that are too large or too small should be rejected
for o in (-3, -1, 1, 2):
output_shape = (1, 1, computed_shape[2] + o,
computed_shape[3] + o, computed_shape[4] + o)
# expect an error
self.tcase_gi(image_shape, kernel_shape, output_shape,
(s, s), border_mode, True, True, (d, d), True)
for (i, k) in ((1, 1), (1, 2), (2, 1), (4, 2), (4, 3), (7, 3), (9, 5)):
for border_mode in ('valid', 'half', 'full', (0, 2, 1)):
for (s, d) in ((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (1, 3)):
image_shape = (1, 1, i, i, i)
kernel_shape = (1, 1, k, k, k)
# compute the output that these inputs and parameters would produce
computed_shape = get_conv_output_shape(
image_shape, kernel_shape, border_mode, (s, s, s), (d, d, d))
yield (run_for_output_offsets,
image_shape, kernel_shape, s, border_mode, d)
def run_fwd(self, inputs_shape, filters_shape, def run_fwd(self, inputs_shape, filters_shape,
conv_fn=conv.conv3d, conv_op=conv.AbstractConv3d, conv_fn=conv.conv3d, conv_op=conv.AbstractConv3d,
ref=conv3d_corr, **kwargs): ref=conv3d_corr, **kwargs):
...@@ -669,6 +1061,26 @@ class TestCorrConv3d(BaseTestConv3d): ...@@ -669,6 +1061,26 @@ class TestCorrConv3d(BaseTestConv3d):
filter_flip=flip, target_op=Corr3dMM_gradInputs, filter_flip=flip, target_op=Corr3dMM_gradInputs,
check_trace=True, filter_dilation=fd) check_trace=True, filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
# This test can run even when theano.config.blas.ldflags is empty.
if (not theano.config.cxx or
theano.config.mode == "FAST_COMPILE"):
raise SkipTest("Need blas to test conv3d")
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=True,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=Corr3dMM_gradInputs,
check_trace=True, filter_dilation=fd)
else:
assert_raises(ValueError,
self.run_gradinput,
inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s, verify_grad=False,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip, target_op=Corr3dMM_gradInputs,
ref=None, check_trace=True, filter_dilation=fd)
class TestCpuConv3d(BaseTestConv3d): class TestCpuConv3d(BaseTestConv3d):
@classmethod @classmethod
...@@ -687,6 +1099,9 @@ class TestCpuConv3d(BaseTestConv3d): ...@@ -687,6 +1099,9 @@ class TestCpuConv3d(BaseTestConv3d):
raise SkipTest("No dilation implementation for basic cpu Conv3D.") raise SkipTest("No dilation implementation for basic cpu Conv3D.")
if not theano.config.cxx: if not theano.config.cxx:
raise SkipTest("Need cxx to test conv2d") raise SkipTest("Need cxx to test conv2d")
if min(i) == 0 or min(f) == 0:
raise SkipTest('Not tested for old cpu Conv3D.')
mode = self.mode mode = self.mode
o = self.get_output_shape(i, f, s, b, fd) o = self.get_output_shape(i, f, s, b, fd)
fwd_OK = True fwd_OK = True
...@@ -770,6 +1185,30 @@ class TestCpuConv3d(BaseTestConv3d): ...@@ -770,6 +1185,30 @@ class TestCpuConv3d(BaseTestConv3d):
check_trace=True, check_trace=True,
filter_dilation=fd) filter_dilation=fd)
def tcase_gi(self, i, f, o, s, b, flip, provide_shape, fd=(1, 1, 1), expect_error=False):
if fd != (1, 1, 1):
raise SkipTest("No dilation implementation for basic cpu Conv3D.")
mode = self.mode
if min(i) == 0 or min(f) == 0 or min(o) == 0:
raise SkipTest('Not tested for old cpu Conv3D.')
if b not in ((0, 0, 0), 'valid'):
return
if not expect_error:
self.run_gradinput(inputs_shape=i, filters_shape=f,
output_shape=o, subsample=s,
verify_grad=False, mode=mode,
provide_shape=provide_shape, border_mode=b,
filter_flip=flip,
target_op=ConvTransp3D,
check_trace=True,
filter_dilation=fd)
else:
# we do not check for inconsistent shapes,
# because this older implementation does not check that
raise SkipTest('Inconsistent shapes are not tested for old cpu Conv3D.')
def test_constant_shapes(): def test_constant_shapes():
# Check that the `imshp` and `kshp` parameters of the AbstractConv Ops # Check that the `imshp` and `kshp` parameters of the AbstractConv Ops
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论