提交 9b8847df authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #3644 from laurent-dinh/conv_infer_shape

Factoring inference of convolution output shape
...@@ -12,6 +12,7 @@ from theano.gof.type import CDataType, Generic ...@@ -12,6 +12,7 @@ from theano.gof.type import CDataType, Generic
from theano.compile import optdb from theano.compile import optdb
from theano.compile.ops import shape_i from theano.compile.ops import shape_i
from theano.tensor.nnet import SoftmaxGrad from theano.tensor.nnet import SoftmaxGrad
from theano.tensor.nnet.abstract_conv2d import get_conv_output_shape
from theano.tensor.signal.downsample import ( from theano.tensor.signal.downsample import (
DownsampleFactorMax, MaxPoolGrad, AveragePoolGrad) DownsampleFactorMax, MaxPoolGrad, AveragePoolGrad)
...@@ -473,48 +474,11 @@ class GpuDnnConv(DnnBase): ...@@ -473,48 +474,11 @@ class GpuDnnConv(DnnBase):
or scalar. or scalar.
""" """
b = ishape[0] # Number of inputs return get_conv_output_shape(
h = ishape[2] # Height of input feature maps ishape,
w = ishape[3] # Width of input feature maps kshape,
nb = kshape[0] # Number of output feature maps border_mode,
kh = kshape[2] # Height of each filter subsample)
kw = kshape[3] # Width of each filter
nd = len(subsample)
if nd > 2:
d = ishape[4]
kd = ishape[4]
sh = subsample[0]
sw = subsample[1]
if nd > 2:
sd = subsample[2]
if border_mode == 'full':
padh = kh - 1
padw = kw - 1
if nd > 4:
padd = kd - 1
elif isinstance(border_mode, tuple):
padh = border_mode[0]
padw = border_mode[1]
if nd > 2:
padd = border_mode[2]
else:
assert border_mode == 'valid'
padh = 0
padw = 0
padd = 0
res = [b, nb,
(h + 2 * padh - kh) // sh + 1,
(w + 2 * padw - kw) // sw + 1]
if nd > 2:
res.append(d + 2 * padd - kd // sd + 1)
return res
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[2]] return [shape[2]]
......
...@@ -13,6 +13,89 @@ __docformat__ = "restructuredtext en" ...@@ -13,6 +13,89 @@ __docformat__ = "restructuredtext en"
_logger = logging.getLogger("theano.tensor.nnet.conv2d") _logger = logging.getLogger("theano.tensor.nnet.conv2d")
def get_conv_output_shape(image_shape, kernel_shape,
border_mode, subsample):
"""
This function compute the output shape of convolution operation.
Parameters
----------
image_shape: tuple of int (symbolic or numeric) corresponding to the input
image shape. Its four (or five) element must correspond respectively
to: batch size, number of input channels, height and width (and
possibly depth) of the image. None where undefined.
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel shape. Its four (or five) elements must correspond respectively
to: number of output channels, number of input channels, height and
width (and possibly depth) of the kernel. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
to the padding on height and width (and possibly depth) axis.
subsample: tuple of int (symbolic or numeric). Its or three elements
espectively correspond to the subsampling on height and width (and
possibly depth) axis.
Returns
-------
output_shape: tuple of int corresponding to the output image shape. Its
four element must correspond respectively to: batch size, number of
output channels, height and width of the image. None where undefined.
"""
bsize, imshp = image_shape[0], list(image_shape[2:])
nkern, kshp = kernel_shape[0], list(kernel_shape[2:])
if isinstance(border_mode, tuple):
out_shp = tuple(get_conv_shape_1axis(
imshp[i], kshp[i], border_mode[i], subsample[i])
for i in range(len(subsample)))
else:
out_shp = tuple(get_conv_shape_1axis(
imshp[i], kshp[i], border_mode, subsample[i])
for i in range(len(subsample)))
return (bsize, nkern) + out_shp
def get_conv_shape_1axis(image_shape, kernel_shape,
border_mode, subsample):
"""
This function compute the output shape of convolution operation.
Parameters
----------
image_shape: int or None. Corresponds to the input image shape on a
given axis. None if undefined.
kernel_shape: int or None. Corresponds to the kernel shape on a given
axis. None if undefined.
border_mode: string or int. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis.
subsample: int. It must correspond to the subsampling on the
considered axis.
Returns
-------
out_shp: int corresponding to the output image shape on the
considered axis. None if undefined.
"""
if None in [image_shape, kernel_shape, border_mode, subsample]:
return None
if border_mode == "half":
pad = kernel_shape // 2
elif border_mode == "full":
pad = kernel_shape - 1
elif border_mode == "valid":
pad = 0
else:
pad = border_mode
if pad < 0:
raise ValueError("border_mode must be >= 0")
out_shp = (image_shape + 2 * pad - kernel_shape) // subsample + 1
return out_shp
def conv2d(input, def conv2d(input,
filters, filters,
input_shape=None, input_shape=None,
......
...@@ -14,12 +14,14 @@ import logging ...@@ -14,12 +14,14 @@ import logging
import numpy import numpy
from six.moves import xrange from six.moves import xrange
import warnings
import theano import theano
from theano import OpenMPOp from theano import OpenMPOp
from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value, from theano.tensor import (as_tensor_variable, blas, get_scalar_constant_value,
patternbroadcast, NotScalarConstantError) patternbroadcast, NotScalarConstantError)
from theano.gof import Apply from theano.gof import Apply
from theano.tensor.nnet.abstract_conv2d import get_conv_output_shape
try: try:
# TODO: move these back out to global scope when they no longer # TODO: move these back out to global scope when they no longer
...@@ -363,10 +365,13 @@ class ConvOp(OpenMPOp): ...@@ -363,10 +365,13 @@ class ConvOp(OpenMPOp):
# The formula would be ceil((i + s * k - s * 1) / float(d)), # The formula would be ceil((i + s * k - s * 1) / float(d)),
# with s=1 for mode=='full' and s=-1 for mode=='valid'. # with s=1 for mode=='full' and s=-1 for mode=='valid'.
# To support symbolic shapes, we express this with integer arithmetics. # To support symbolic shapes, we express this with integer arithmetics.
return tuple(None if i is None or k is None warnings.warn("The method `getOutputShape` is deprecated use"
else ((i - k) // d + 1) if mode == 'valid' "`get_conv_output_shape` instead.")
else ((i + k + d - 2) // d) return get_conv_output_shape(
for i, k, d in zip(inshp, kshp, stride)) image_shape=(None, None, inshp[0], inshp[1]),
kernel_shape=(None, None, kshp[0], kshp[1]),
border_mode=mode,
subsample=stride)
def __init__(self, imshp=None, kshp=None, nkern=None, bsize=None, def __init__(self, imshp=None, kshp=None, nkern=None, bsize=None,
dx=1, dy=1, dx=1, dy=1,
...@@ -511,12 +516,16 @@ class ConvOp(OpenMPOp): ...@@ -511,12 +516,16 @@ class ConvOp(OpenMPOp):
_logger.warn(warnstr, self.unroll_kern, self.nkern, new) _logger.warn(warnstr, self.unroll_kern, self.nkern, new)
self.unroll_kern = new self.unroll_kern = new
self.outshp = ConvOp.getOutputShape(self.imshp_logical[1:], self.outshp = get_conv_output_shape(
self.kshp_logical, (dx, dy), (None,) + self.imshp_logical,
output_mode) (None, None,) + self.kshp_logical,
self.fulloutshp = ConvOp.getOutputShape(self.imshp_logical[1:], output_mode,
self.kshp_logical, (1, 1), (dx, dy))[2:]
output_mode) self.fulloutshp = get_conv_output_shape(
(None,) + self.imshp_logical,
(None, None,) + self.kshp_logical,
output_mode,
(1, 1))[2:]
self.out_mode = output_mode self.out_mode = output_mode
...@@ -669,9 +678,12 @@ class ConvOp(OpenMPOp): ...@@ -669,9 +678,12 @@ class ConvOp(OpenMPOp):
if self.kshp_logical[i] is not None: if self.kshp_logical[i] is not None:
kshp[i] = self.kshp_logical[i] kshp[i] = self.kshp_logical[i]
# infer output shape from what we have # infer output shape from what we have
outshp = ConvOp.getOutputShape(imshp[1:], kshp, (self.dx, self.dy), res = get_conv_output_shape(
self.out_mode) (bsize,) + tuple(imshp),
return [(bsize, nkern) + outshp] (nkern, None,) + tuple(kshp),
self.out_mode,
(self.dx, self.dy))
return [res]
def perform(self, node, inp, out): def perform(self, node, inp, out):
""" """
...@@ -737,8 +749,11 @@ class ConvOp(OpenMPOp): ...@@ -737,8 +749,11 @@ class ConvOp(OpenMPOp):
if all(shp is not None for shp in self.fulloutshp): if all(shp is not None for shp in self.fulloutshp):
fulloutshp = tuple(self.fulloutshp) fulloutshp = tuple(self.fulloutshp)
else: else:
fulloutshp = tuple(ConvOp.getOutputShape(imshp_logical[ fulloutshp = get_conv_output_shape(
1:], kshp_logical, (1, 1), self.out_mode)) (None,) + imshp_logical,
(None, None,) + kshp_logical,
self.out_mode,
(1, 1))[2:]
if z[0] is None or z[0].shape != (bsize, nkern,) + fulloutshp: if z[0] is None or z[0].shape != (bsize, nkern,) + fulloutshp:
z[0] = numpy.zeros((bsize, nkern,) + fulloutshp, z[0] = numpy.zeros((bsize, nkern,) + fulloutshp,
......
...@@ -5,6 +5,7 @@ import theano ...@@ -5,6 +5,7 @@ import theano
from theano import Apply from theano import Apply
from theano import gof from theano import gof
from theano.tensor import as_tensor_variable, TensorType from theano.tensor import as_tensor_variable, TensorType
from theano.tensor.nnet.abstract_conv2d import get_conv_output_shape
from theano.tensor.blas_headers import blas_header_text from theano.tensor.blas_headers import blas_header_text
from theano.tensor.blas import ldflags from theano.tensor.blas import ldflags
...@@ -370,37 +371,14 @@ class CorrMM(BaseCorrMM): ...@@ -370,37 +371,14 @@ class CorrMM(BaseCorrMM):
return Apply(self, [img, kern], [TensorType(dtype, broadcastable)()]) return Apply(self, [img, kern], [TensorType(dtype, broadcastable)()])
def infer_shape(self, node, input_shape): def infer_shape(self, node, input_shape):
if self.border_mode == "half":
padH = padW = -1
elif self.border_mode == "full":
padH = padW = -2
elif isinstance(self.border_mode, tuple):
padH, padW = self.border_mode
else:
assert self.border_mode == "valid"
padH = padW = 0
dH, dW = self.subsample
imshp = input_shape[0] imshp = input_shape[0]
kshp = input_shape[1] kshp = input_shape[1]
bsize, imshp = imshp[0], list(imshp[2:]) res = get_conv_output_shape(
nkern, kshp = kshp[0], list(kshp[2:]) imshp,
kH, kW = kshp kshp,
if padH == -1: self.border_mode,
padH = kH // 2 self.subsample)
elif padH == -2: return [res]
padH = kH - 1
elif padH < 0:
raise ValueError("CorrMM: border_mode must be >= 0")
if padW == -1:
padW = kW // 2
elif padW == -2:
padW = kW - 1
elif padW < 0:
raise ValueError("CorrMM: border_mode must be >= 0")
out_shp0 = (imshp[0] + 2 * padH - kshp[0]) // dH + 1
out_shp1 = (imshp[1] + 2 * padW - kshp[1]) // dW + 1
out_shp = (out_shp0, out_shp1)
return [(bsize, nkern) + out_shp]
def c_code(self, node, nodename, inp, out_, sub): def c_code(self, node, nodename, inp, out_, sub):
bottom, weights = inp bottom, weights = inp
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论