提交 471a1711 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2543 from yaoli/pool_pad

support max pooling with padding
......@@ -4,7 +4,7 @@ Planned:
DownsampleFactorMax, DownsampleAvg, DownsampleSoftmax.
"""
#This file should move along with conv.py
# This file should move along with conv.py
import __builtin__
import numpy
......@@ -19,7 +19,7 @@ def max_pool2D(*args, **kwargs):
return max_pool_2d(*args, **kwargs)
def max_pool_2d(input, ds, ignore_border=False, st=None):
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0)):
"""
Takes as input a N-D tensor, where N >= 2. It downscales the input image by
the specified factor, by keeping only the maximum value of non-overlapping
......@@ -39,6 +39,10 @@ def max_pool_2d(input, ds, ignore_border=False, st=None):
over rows/cols to get the the next pool region.
if st is None, it is considered equal to ds
(no overlap on pooling regions)
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
"""
if input.ndim < 2:
......@@ -62,7 +66,7 @@ def max_pool_2d(input, ds, ignore_border=False, st=None):
input_4D = tensor.reshape(input, new_shape, ndim=4)
# downsample mini-batch of images
op = DownsampleFactorMax(ds, ignore_border, st=st)
op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding)
output = op(input_4D)
# restore to original shape
......@@ -77,10 +81,10 @@ class DownsampleFactorMax(Op):
regions.
"""
__props__ = ('ds', 'ignore_border', 'st')
__props__ = ('ds', 'ignore_border', 'st', 'padding')
@staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None):
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
"""Return the shape of the output from this op, for input of given
shape and flags.
......@@ -101,6 +105,11 @@ class DownsampleFactorMax(Op):
extra row/col of partial downsampling (False) or ignore it (True).
:type ignore_border: bool
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
:rtype: list
:returns: the shape of the output from this op, for input of given
shape. This will have the same length as imgshape, but with last
......@@ -113,6 +122,8 @@ class DownsampleFactorMax(Op):
if st is None:
st = ds
r, c = imgshape[-2:]
r += padding[0] * 2
c += padding[1] * 2
if ignore_border:
out_r = (r - ds[0]) // st[0] + 1
......@@ -149,7 +160,7 @@ class DownsampleFactorMax(Op):
rval = list(imgshape[:-2]) + [nr, nc]
return rval
def __init__(self, ds, ignore_border=False, st=None):
def __init__(self, ds, ignore_border=False, st=None, padding=(0, 0)):
"""
:param ds: downsample factor over rows and column.
ds indicates the pool region size.
......@@ -166,6 +177,11 @@ class DownsampleFactorMax(Op):
(no overlap on pooling regions)
: type st: list or tuple of two ints
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
"""
self.ds = tuple(ds)
if not all([isinstance(d, int) for d in ds]):
......@@ -176,10 +192,19 @@ class DownsampleFactorMax(Op):
st = ds
self.st = tuple(st)
self.ignore_border = ignore_border
self.padding = tuple(padding)
self.padding = padding
if padding != (0, 0) and not ignore_border:
raise NotImplementedError(
'padding works only with ignore_boarder=True')
if self.padding[0] >= self.ds[0] or self.padding[1] >= self.ds[1]:
raise NotImplementedError(
'padding_h and padding_w must be smaller than strides')
def __str__(self):
return '%s{%s,%s,%s}' % (self.__class__.__name__,
self.ds, self.st, self.ignore_border)
return '%s{%s, %s, %s, %s}' % (
self.__class__.__name__,
self.ds, self.st, self.ignore_border, self.padding)
def make_node(self, x):
if x.type.ndim != 4:
......@@ -195,22 +220,33 @@ class DownsampleFactorMax(Op):
if len(x.shape) != 4:
raise NotImplementedError(
'DownsampleFactorMax requires 4D input for now')
z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st)
z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st,
self.padding)
if (z[0] is None) or (z[0].shape != z_shape):
z[0] = numpy.empty(self.out_shape(x.shape, self.ds,
self.ignore_border, self.st),
dtype=x.dtype)
z[0] = numpy.empty(
self.out_shape(x.shape, self.ds, self.ignore_border,
self.st, self.padding),
dtype=x.dtype)
zz = z[0]
#number of pooling output rows
# number of pooling output rows
pr = zz.shape[-2]
#number of pooling output cols
# number of pooling output cols
pc = zz.shape[-1]
ds0, ds1 = self.ds
st0, st1 = self.st
img_rows = x.shape[-2]
img_cols = x.shape[-1]
pad_h = self.padding[0]
pad_w = self.padding[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
# pad the image
fill = x.min()-1.
y = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype) + fill
y[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)] = x
# max pooling
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for r in xrange(pr):
......@@ -219,7 +255,7 @@ class DownsampleFactorMax(Op):
for c in xrange(pc):
col_st = c * st1
col_end = __builtin__.min(col_st + ds1, img_cols)
zz[n, k, r, c] = x[
zz[n, k, r, c] = y[
n, k, row_st:row_end, col_st:col_end].max()
def infer_shape(self, node, in_shapes):
......@@ -233,7 +269,7 @@ class DownsampleFactorMax(Op):
maxout = self(x)
return [DownsampleFactorMaxGrad(self.ds,
ignore_border=self.ignore_border,
st=self.st)(
st=self.st, padding=self.padding)(
x, maxout, gz)]
def c_code(self, node, name, inp, out, sub):
......@@ -241,7 +277,7 @@ class DownsampleFactorMax(Op):
# the stride size and the pooling size are different.
# An exception is raised for such a case.
if self.ds != self.st:
raise theano.gof.utils.MethodNotDefined()
raise theano.gof.utils.MethodNotDefined()
x, = inp
z, = out
fail = sub['fail']
......@@ -318,18 +354,20 @@ class DownsampleFactorMax(Op):
class DownsampleFactorMaxGrad(Op):
__props__ = ('ds', 'ignore_border', 'st')
__props__ = ('ds', 'ignore_border', 'st', 'padding')
def __init__(self, ds, ignore_border, st=None):
def __init__(self, ds, ignore_border, st=None, padding=(0, 0)):
self.ds = tuple(ds)
self.ignore_border = ignore_border
if st is None:
st = ds
self.st = tuple(st)
self.padding = tuple(padding)
def __str__(self):
return '%s{%s,%s,%s}' % (self.__class__.__name__,
self.ds, self.st, self.ignore_border)
return '%s{%s, %s, %s, %s}' % (
self.__class__.__name__,
self.ds, self.st, self.ignore_border, self.padding)
def make_node(self, x, maxout, gz):
# make_node should only be called by the grad function of
......@@ -343,17 +381,23 @@ class DownsampleFactorMaxGrad(Op):
def perform(self, node, inp, out):
x, maxout, gz = inp
gx_stg, = out
gx = numpy.zeros_like(x)
#number of pooling output rows
# number of pooling output rows
pr = maxout.shape[-2]
#number of pooling output cols
# number of pooling output cols
pc = maxout.shape[-1]
ds0, ds1 = self.ds
st0, st1 = self.st
img_rows = x.shape[-2]
img_cols = x.shape[-1]
pad_h = self.padding[0]
pad_w = self.padding[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
# pad the image
fill = x.min()-1
y = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols), dtype=x.dtype) + fill
y[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)] = x
gx = numpy.zeros_like(y)
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for r in xrange(pr):
......@@ -364,8 +408,10 @@ class DownsampleFactorMaxGrad(Op):
col_end = __builtin__.min(col_st + ds1, img_cols)
for row_ind in xrange(row_st, row_end):
for col_ind in xrange(col_st, col_end):
if (maxout[n, k, r, c] == x[n, k, row_ind, col_ind]):
if (maxout[n, k, r, c] == y[n, k, row_ind, col_ind]):
gx[n, k, row_ind, col_ind] += gz[n, k, r, c]
# unpad the image
gx = gx[:, :, pad_h:(img_rows-pad_h), pad_w:(img_cols-pad_w)]
gx_stg[0] = gx
def infer_shape(self, node, in_shapes):
......@@ -374,14 +420,21 @@ class DownsampleFactorMaxGrad(Op):
def grad(self, inp, grads):
x, maxout, gz = inp
ggx, = grads
return [theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout),
DownsampleFactorMaxGradGrad(
self.ds, ignore_border=self.ignore_border, st=self.st)(x, maxout, ggx)]
if self.padding == (0, 0):
return [theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout),
DownsampleFactorMaxGradGrad(
self.ds, ignore_border=self.ignore_border,
st=self.st)(x, maxout, ggx)]
else:
return [theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout),
theano.gradients.grad_not_implemented(
self, 2, gz, 'Hessian not implemented with padding')]
def c_code(self, node, name, inp, out, sub):
if self.ds != self.st:
raise theano.gof.utils.MethodNotDefined()
raise theano.gof.utils.MethodNotDefined()
x, z, gz = inp
gx, = out
fail = sub['fail']
......@@ -593,9 +646,9 @@ class DownsampleFactorMaxGradGrad(Op):
dtype=x.dtype)
ggz = z[0]
#number of pooling output rows
# number of pooling output rows
pr = ggz.shape[-2]
#number of pooling output cols
# number of pooling output cols
pc = ggz.shape[-1]
ds0, ds1 = self.ds
st0, st1 = self.st
......
......@@ -38,6 +38,49 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val[k][i, j] = numpy.max(patch)
return output_val
@staticmethod
def numpy_max_pool_2d_stride_padding(
x, ds, ignore_border=True, st=None, padding=(0, 0)):
pad_h = padding[0]
pad_w = padding[1]
h = x.shape[-2]
w = x.shape[-1]
assert ds[0] > pad_h
assert ds[1] > pad_w
def pad_img(x):
fill = x.min()-1
t = numpy.ones((x.shape[0], x.shape[1], 1, 1))
ud_bar = (numpy.zeros((pad_h, w)) + fill)[
numpy.newaxis, numpy.newaxis, :, :] * t
lr_bar = (numpy.zeros((pad_h * 2 + h, pad_w)) + fill)[
numpy.newaxis, numpy.newaxis, :, :] * t
y = numpy.concatenate([ud_bar, x, ud_bar], axis=2)
y = numpy.concatenate([lr_bar, y, lr_bar], axis=3)
return y
img_rows = h + 2 * pad_h
img_cols = w + 2 * pad_w
out_r = (img_rows - ds[0]) // st[0] + 1
out_c = (img_cols - ds[1]) // st[1] + 1
out_shp = list(x.shape[:-2])
out_shp.append(out_r)
out_shp.append(out_c)
ds0, ds1 = ds
st0, st1 = st
output_val = numpy.zeros(out_shp)
tt = []
y = pad_img(x)
for k in numpy.ndindex(*x.shape[:-2]):
for i in range(output_val.shape[-2]):
ii_st = i * st[0]
ii_end = __builtin__.min(ii_st + ds[0], img_rows)
for j in range(output_val.shape[-1]):
jj_st = j * st[1]
jj_end = __builtin__.min(jj_st + ds[1], img_cols)
patch = y[k][ii_st:ii_end, jj_st:jj_end]
output_val[k][i, j] = numpy.max(patch)
return output_val
@staticmethod
def numpy_max_pool_2d_stride(input, ds, ignore_border=False, st=None):
'''Helper function, implementing max_pool_2d in pure numpy
......@@ -196,6 +239,53 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val = f(imval)
utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxPaddingStride(self):
ignore_border = True # padding does not support ignore_border=False
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolsizes = [(3, 3), (4, 4), (3, 4), (4, 3)]
stridesizes = [(2, 2), (2, 2), (1, 1), (1, 2)]
paddingsizes = [(2, 2), (1, 2), (2, 1), (0, 0)]
imgsizes = [(5, 5), (5, 5), (5, 6), (6, 5)]
m = 4 # minibatch
c = 10 # channel size
images = tensor.dtensor4()
for indx in numpy.arange(len(maxpoolsizes)):
imgsize = imgsizes[indx]
imval = rng.rand(m, c, imgsize[0], imgsize[1])
stridesize = stridesizes[indx]
maxpoolsize = maxpoolsizes[indx]
paddingsize = paddingsizes[indx]
numpy_output_val = self.numpy_max_pool_2d_stride_padding(
imval, maxpoolsize, ignore_border, stridesize, paddingsize)
maxpool_op = DownsampleFactorMax(
maxpoolsize,
ignore_border=ignore_border,
st=stridesize, padding=paddingsize)(images)
f = function([images], maxpool_op)
output_val = f(imval)
utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxPaddingStride_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5))
maxpoolsizes = ((5, 3),(3, 5))
stridesizes = ((3, 2), (2, 3))
paddingsizes = ((2, 2),(2, 1))
for i in range(len(imgsizes)):
imgsize = imgsizes[i]
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
maxpoolsize = maxpoolsizes[i]
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
def mp(input):
return DownsampleFactorMax(
maxpoolsize, ignore_border=True,
st=stridesize,
padding=paddingsize,
)(input)
utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMax_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 2), (2, 3))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论