提交 471a1711 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2543 from yaoli/pool_pad

support max pooling with padding
...@@ -4,7 +4,7 @@ Planned: ...@@ -4,7 +4,7 @@ Planned:
DownsampleFactorMax, DownsampleAvg, DownsampleSoftmax. DownsampleFactorMax, DownsampleAvg, DownsampleSoftmax.
""" """
#This file should move along with conv.py # This file should move along with conv.py
import __builtin__ import __builtin__
import numpy import numpy
...@@ -19,7 +19,7 @@ def max_pool2D(*args, **kwargs): ...@@ -19,7 +19,7 @@ def max_pool2D(*args, **kwargs):
return max_pool_2d(*args, **kwargs) return max_pool_2d(*args, **kwargs)
def max_pool_2d(input, ds, ignore_border=False, st=None): def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0)):
""" """
Takes as input a N-D tensor, where N >= 2. It downscales the input image by Takes as input a N-D tensor, where N >= 2. It downscales the input image by
the specified factor, by keeping only the maximum value of non-overlapping the specified factor, by keeping only the maximum value of non-overlapping
...@@ -39,6 +39,10 @@ def max_pool_2d(input, ds, ignore_border=False, st=None): ...@@ -39,6 +39,10 @@ def max_pool_2d(input, ds, ignore_border=False, st=None):
over rows/cols to get the the next pool region. over rows/cols to get the the next pool region.
if st is None, it is considered equal to ds if st is None, it is considered equal to ds
(no overlap on pooling regions) (no overlap on pooling regions)
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
""" """
if input.ndim < 2: if input.ndim < 2:
...@@ -62,7 +66,7 @@ def max_pool_2d(input, ds, ignore_border=False, st=None): ...@@ -62,7 +66,7 @@ def max_pool_2d(input, ds, ignore_border=False, st=None):
input_4D = tensor.reshape(input, new_shape, ndim=4) input_4D = tensor.reshape(input, new_shape, ndim=4)
# downsample mini-batch of images # downsample mini-batch of images
op = DownsampleFactorMax(ds, ignore_border, st=st) op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding)
output = op(input_4D) output = op(input_4D)
# restore to original shape # restore to original shape
...@@ -77,10 +81,10 @@ class DownsampleFactorMax(Op): ...@@ -77,10 +81,10 @@ class DownsampleFactorMax(Op):
regions. regions.
""" """
__props__ = ('ds', 'ignore_border', 'st') __props__ = ('ds', 'ignore_border', 'st', 'padding')
@staticmethod @staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None): def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
"""Return the shape of the output from this op, for input of given """Return the shape of the output from this op, for input of given
shape and flags. shape and flags.
...@@ -101,6 +105,11 @@ class DownsampleFactorMax(Op): ...@@ -101,6 +105,11 @@ class DownsampleFactorMax(Op):
extra row/col of partial downsampling (False) or ignore it (True). extra row/col of partial downsampling (False) or ignore it (True).
:type ignore_border: bool :type ignore_border: bool
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
:rtype: list :rtype: list
:returns: the shape of the output from this op, for input of given :returns: the shape of the output from this op, for input of given
shape. This will have the same length as imgshape, but with last shape. This will have the same length as imgshape, but with last
...@@ -113,6 +122,8 @@ class DownsampleFactorMax(Op): ...@@ -113,6 +122,8 @@ class DownsampleFactorMax(Op):
if st is None: if st is None:
st = ds st = ds
r, c = imgshape[-2:] r, c = imgshape[-2:]
r += padding[0] * 2
c += padding[1] * 2
if ignore_border: if ignore_border:
out_r = (r - ds[0]) // st[0] + 1 out_r = (r - ds[0]) // st[0] + 1
...@@ -149,7 +160,7 @@ class DownsampleFactorMax(Op): ...@@ -149,7 +160,7 @@ class DownsampleFactorMax(Op):
rval = list(imgshape[:-2]) + [nr, nc] rval = list(imgshape[:-2]) + [nr, nc]
return rval return rval
def __init__(self, ds, ignore_border=False, st=None): def __init__(self, ds, ignore_border=False, st=None, padding=(0, 0)):
""" """
:param ds: downsample factor over rows and column. :param ds: downsample factor over rows and column.
ds indicates the pool region size. ds indicates the pool region size.
...@@ -166,6 +177,11 @@ class DownsampleFactorMax(Op): ...@@ -166,6 +177,11 @@ class DownsampleFactorMax(Op):
(no overlap on pooling regions) (no overlap on pooling regions)
: type st: list or tuple of two ints : type st: list or tuple of two ints
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
""" """
self.ds = tuple(ds) self.ds = tuple(ds)
if not all([isinstance(d, int) for d in ds]): if not all([isinstance(d, int) for d in ds]):
...@@ -176,10 +192,19 @@ class DownsampleFactorMax(Op): ...@@ -176,10 +192,19 @@ class DownsampleFactorMax(Op):
st = ds st = ds
self.st = tuple(st) self.st = tuple(st)
self.ignore_border = ignore_border self.ignore_border = ignore_border
self.padding = tuple(padding)
self.padding = padding
if padding != (0, 0) and not ignore_border:
raise NotImplementedError(
'padding works only with ignore_boarder=True')
if self.padding[0] >= self.ds[0] or self.padding[1] >= self.ds[1]:
raise NotImplementedError(
'padding_h and padding_w must be smaller than strides')
def __str__(self): def __str__(self):
return '%s{%s,%s,%s}' % (self.__class__.__name__, return '%s{%s, %s, %s, %s}' % (
self.ds, self.st, self.ignore_border) self.__class__.__name__,
self.ds, self.st, self.ignore_border, self.padding)
def make_node(self, x): def make_node(self, x):
if x.type.ndim != 4: if x.type.ndim != 4:
...@@ -195,22 +220,33 @@ class DownsampleFactorMax(Op): ...@@ -195,22 +220,33 @@ class DownsampleFactorMax(Op):
if len(x.shape) != 4: if len(x.shape) != 4:
raise NotImplementedError( raise NotImplementedError(
'DownsampleFactorMax requires 4D input for now') 'DownsampleFactorMax requires 4D input for now')
z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st) z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st,
self.padding)
if (z[0] is None) or (z[0].shape != z_shape): if (z[0] is None) or (z[0].shape != z_shape):
z[0] = numpy.empty(self.out_shape(x.shape, self.ds, z[0] = numpy.empty(
self.ignore_border, self.st), self.out_shape(x.shape, self.ds, self.ignore_border,
dtype=x.dtype) self.st, self.padding),
dtype=x.dtype)
zz = z[0] zz = z[0]
# number of pooling output rows
#number of pooling output rows
pr = zz.shape[-2] pr = zz.shape[-2]
#number of pooling output cols # number of pooling output cols
pc = zz.shape[-1] pc = zz.shape[-1]
ds0, ds1 = self.ds ds0, ds1 = self.ds
st0, st1 = self.st st0, st1 = self.st
img_rows = x.shape[-2] pad_h = self.padding[0]
img_cols = x.shape[-1] pad_w = self.padding[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
# pad the image
fill = x.min()-1.
y = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype) + fill
y[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)] = x
# max pooling
for n in xrange(x.shape[0]): for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]): for k in xrange(x.shape[1]):
for r in xrange(pr): for r in xrange(pr):
...@@ -219,7 +255,7 @@ class DownsampleFactorMax(Op): ...@@ -219,7 +255,7 @@ class DownsampleFactorMax(Op):
for c in xrange(pc): for c in xrange(pc):
col_st = c * st1 col_st = c * st1
col_end = __builtin__.min(col_st + ds1, img_cols) col_end = __builtin__.min(col_st + ds1, img_cols)
zz[n, k, r, c] = x[ zz[n, k, r, c] = y[
n, k, row_st:row_end, col_st:col_end].max() n, k, row_st:row_end, col_st:col_end].max()
def infer_shape(self, node, in_shapes): def infer_shape(self, node, in_shapes):
...@@ -233,7 +269,7 @@ class DownsampleFactorMax(Op): ...@@ -233,7 +269,7 @@ class DownsampleFactorMax(Op):
maxout = self(x) maxout = self(x)
return [DownsampleFactorMaxGrad(self.ds, return [DownsampleFactorMaxGrad(self.ds,
ignore_border=self.ignore_border, ignore_border=self.ignore_border,
st=self.st)( st=self.st, padding=self.padding)(
x, maxout, gz)] x, maxout, gz)]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
...@@ -241,7 +277,7 @@ class DownsampleFactorMax(Op): ...@@ -241,7 +277,7 @@ class DownsampleFactorMax(Op):
# the stride size and the pooling size are different. # the stride size and the pooling size are different.
# An exception is raised for such a case. # An exception is raised for such a case.
if self.ds != self.st: if self.ds != self.st:
raise theano.gof.utils.MethodNotDefined() raise theano.gof.utils.MethodNotDefined()
x, = inp x, = inp
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
...@@ -318,18 +354,20 @@ class DownsampleFactorMax(Op): ...@@ -318,18 +354,20 @@ class DownsampleFactorMax(Op):
class DownsampleFactorMaxGrad(Op): class DownsampleFactorMaxGrad(Op):
__props__ = ('ds', 'ignore_border', 'st') __props__ = ('ds', 'ignore_border', 'st', 'padding')
def __init__(self, ds, ignore_border, st=None): def __init__(self, ds, ignore_border, st=None, padding=(0, 0)):
self.ds = tuple(ds) self.ds = tuple(ds)
self.ignore_border = ignore_border self.ignore_border = ignore_border
if st is None: if st is None:
st = ds st = ds
self.st = tuple(st) self.st = tuple(st)
self.padding = tuple(padding)
def __str__(self): def __str__(self):
return '%s{%s,%s,%s}' % (self.__class__.__name__, return '%s{%s, %s, %s, %s}' % (
self.ds, self.st, self.ignore_border) self.__class__.__name__,
self.ds, self.st, self.ignore_border, self.padding)
def make_node(self, x, maxout, gz): def make_node(self, x, maxout, gz):
# make_node should only be called by the grad function of # make_node should only be called by the grad function of
...@@ -343,17 +381,23 @@ class DownsampleFactorMaxGrad(Op): ...@@ -343,17 +381,23 @@ class DownsampleFactorMaxGrad(Op):
def perform(self, node, inp, out): def perform(self, node, inp, out):
x, maxout, gz = inp x, maxout, gz = inp
gx_stg, = out gx_stg, = out
gx = numpy.zeros_like(x) # number of pooling output rows
#number of pooling output rows
pr = maxout.shape[-2] pr = maxout.shape[-2]
#number of pooling output cols # number of pooling output cols
pc = maxout.shape[-1] pc = maxout.shape[-1]
ds0, ds1 = self.ds ds0, ds1 = self.ds
st0, st1 = self.st st0, st1 = self.st
img_rows = x.shape[-2] pad_h = self.padding[0]
img_cols = x.shape[-1] pad_w = self.padding[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
# pad the image
fill = x.min()-1
y = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols), dtype=x.dtype) + fill
y[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)] = x
gx = numpy.zeros_like(y)
for n in xrange(x.shape[0]): for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]): for k in xrange(x.shape[1]):
for r in xrange(pr): for r in xrange(pr):
...@@ -364,8 +408,10 @@ class DownsampleFactorMaxGrad(Op): ...@@ -364,8 +408,10 @@ class DownsampleFactorMaxGrad(Op):
col_end = __builtin__.min(col_st + ds1, img_cols) col_end = __builtin__.min(col_st + ds1, img_cols)
for row_ind in xrange(row_st, row_end): for row_ind in xrange(row_st, row_end):
for col_ind in xrange(col_st, col_end): for col_ind in xrange(col_st, col_end):
if (maxout[n, k, r, c] == x[n, k, row_ind, col_ind]): if (maxout[n, k, r, c] == y[n, k, row_ind, col_ind]):
gx[n, k, row_ind, col_ind] += gz[n, k, r, c] gx[n, k, row_ind, col_ind] += gz[n, k, r, c]
# unpad the image
gx = gx[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)]
gx_stg[0] = gx gx_stg[0] = gx
def infer_shape(self, node, in_shapes): def infer_shape(self, node, in_shapes):
...@@ -374,14 +420,21 @@ class DownsampleFactorMaxGrad(Op): ...@@ -374,14 +420,21 @@ class DownsampleFactorMaxGrad(Op):
def grad(self, inp, grads): def grad(self, inp, grads):
x, maxout, gz = inp x, maxout, gz = inp
ggx, = grads ggx, = grads
return [theano.tensor.zeros_like(x), if self.padding == (0, 0):
theano.tensor.zeros_like(maxout), return [theano.tensor.zeros_like(x),
DownsampleFactorMaxGradGrad( theano.tensor.zeros_like(maxout),
self.ds, ignore_border=self.ignore_border, st=self.st)(x, maxout, ggx)] DownsampleFactorMaxGradGrad(
self.ds, ignore_border=self.ignore_border,
st=self.st)(x, maxout, ggx)]
else:
return [theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout),
theano.gradients.grad_not_implemented(
self, 2, gz, 'Hessian not implemented with padding')]
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
if self.ds != self.st: if self.ds != self.st:
raise theano.gof.utils.MethodNotDefined() raise theano.gof.utils.MethodNotDefined()
x, z, gz = inp x, z, gz = inp
gx, = out gx, = out
fail = sub['fail'] fail = sub['fail']
...@@ -593,9 +646,9 @@ class DownsampleFactorMaxGradGrad(Op): ...@@ -593,9 +646,9 @@ class DownsampleFactorMaxGradGrad(Op):
dtype=x.dtype) dtype=x.dtype)
ggz = z[0] ggz = z[0]
#number of pooling output rows # number of pooling output rows
pr = ggz.shape[-2] pr = ggz.shape[-2]
#number of pooling output cols # number of pooling output cols
pc = ggz.shape[-1] pc = ggz.shape[-1]
ds0, ds1 = self.ds ds0, ds1 = self.ds
st0, st1 = self.st st0, st1 = self.st
......
...@@ -38,6 +38,49 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -38,6 +38,49 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val[k][i, j] = numpy.max(patch) output_val[k][i, j] = numpy.max(patch)
return output_val return output_val
@staticmethod
def numpy_max_pool_2d_stride_padding(
x, ds, ignore_border=True, st=None, padding=(0, 0)):
pad_h = padding[0]
pad_w = padding[1]
h = x.shape[-2]
w = x.shape[-1]
assert ds[0] > pad_h
assert ds[1] > pad_w
def pad_img(x):
fill = x.min()-1
t = numpy.ones((x.shape[0], x.shape[1], 1, 1))
ud_bar = (numpy.zeros((pad_h, w)) + fill)[
numpy.newaxis, numpy.newaxis, :, :] * t
lr_bar = (numpy.zeros((pad_h * 2 + h, pad_w)) + fill)[
numpy.newaxis, numpy.newaxis, :, :] * t
y = numpy.concatenate([ud_bar, x, ud_bar], axis=2)
y = numpy.concatenate([lr_bar, y, lr_bar], axis=3)
return y
img_rows = h + 2 * pad_h
img_cols = w + 2 * pad_w
out_r = (img_rows - ds[0]) // st[0] + 1
out_c = (img_cols - ds[1]) // st[1] + 1
out_shp = list(x.shape[:-2])
out_shp.append(out_r)
out_shp.append(out_c)
ds0, ds1 = ds
st0, st1 = st
output_val = numpy.zeros(out_shp)
tt = []
y = pad_img(x)
for k in numpy.ndindex(*x.shape[:-2]):
for i in range(output_val.shape[-2]):
ii_st = i * st[0]
ii_end = __builtin__.min(ii_st + ds[0], img_rows)
for j in range(output_val.shape[-1]):
jj_st = j * st[1]
jj_end = __builtin__.min(jj_st + ds[1], img_cols)
patch = y[k][ii_st:ii_end, jj_st:jj_end]
output_val[k][i, j] = numpy.max(patch)
return output_val
@staticmethod @staticmethod
def numpy_max_pool_2d_stride(input, ds, ignore_border=False, st=None): def numpy_max_pool_2d_stride(input, ds, ignore_border=False, st=None):
'''Helper function, implementing max_pool_2d in pure numpy '''Helper function, implementing max_pool_2d in pure numpy
...@@ -196,6 +239,53 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -196,6 +239,53 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val = f(imval) output_val = f(imval)
utt.assert_allclose(output_val, numpy_output_val) utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxPaddingStride(self):
ignore_border = True # padding does not support ignore_border=False
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolsizes = [(3, 3), (4, 4), (3, 4), (4, 3)]
stridesizes = [(2, 2), (2, 2), (1, 1), (1, 2)]
paddingsizes = [(2, 2), (1, 2), (2, 1), (0, 0)]
imgsizes = [(5, 5), (5, 5), (5, 6), (6, 5)]
m = 4 # minibatch
c = 10 # channel size
images = tensor.dtensor4()
for indx in numpy.arange(len(maxpoolsizes)):
imgsize = imgsizes[indx]
imval = rng.rand(m, c, imgsize[0], imgsize[1])
stridesize = stridesizes[indx]
maxpoolsize = maxpoolsizes[indx]
paddingsize = paddingsizes[indx]
numpy_output_val = self.numpy_max_pool_2d_stride_padding(
imval, maxpoolsize, ignore_border, stridesize, paddingsize)
maxpool_op = DownsampleFactorMax(
maxpoolsize,
ignore_border=ignore_border,
st=stridesize, padding=paddingsize)(images)
f = function([images], maxpool_op)
output_val = f(imval)
utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxPaddingStride_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5))
maxpoolsizes = ((5, 3),(3, 5))
stridesizes = ((3, 2), (2, 3))
paddingsizes = ((2, 2),(2, 1))
for i in range(len(imgsizes)):
imgsize = imgsizes[i]
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
maxpoolsize = maxpoolsizes[i]
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
def mp(input):
return DownsampleFactorMax(
maxpoolsize, ignore_border=True,
st=stridesize,
padding=paddingsize,
)(input)
utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMax_grad(self): def test_DownsampleFactorMax_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 2), (2, 3)) maxpoolshps = ((1, 1), (3, 2), (2, 3))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论