提交 f641f02e authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2917 from yaoli/maxpool_c_code

[MRG] implement padding for max pool grad grad
...@@ -602,12 +602,12 @@ class DownsampleFactorMaxGrad(Op): ...@@ -602,12 +602,12 @@ class DownsampleFactorMaxGrad(Op):
def grad(self, inp, grads): def grad(self, inp, grads):
x, maxout, gz = inp x, maxout, gz = inp
ggx, = grads ggx, = grads
if self.padding == (0, 0) and self.mode == 'max': if self.mode == 'max':
return [theano.tensor.zeros_like(x), return [theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout), theano.tensor.zeros_like(maxout),
DownsampleFactorMaxGradGrad( DownsampleFactorMaxGradGrad(
self.ds, ignore_border=self.ignore_border, self.ds, ignore_border=self.ignore_border,
st=self.st)(x, maxout, ggx)] st=self.st, padding=self.padding)(x, maxout, ggx)]
else: else:
return [theano.tensor.zeros_like(x), return [theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout), theano.tensor.zeros_like(maxout),
...@@ -733,10 +733,10 @@ class DownsampleFactorMaxGrad(Op): ...@@ -733,10 +733,10 @@ class DownsampleFactorMaxGrad(Op):
return (0, 7) return (0, 7)
class DownsampleFactorMaxGradGrad(Op): class DownsampleFactorMaxGradGrad(Op):
__props__ = ('ds', 'ignore_border', 'st') __props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
@staticmethod @staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None): def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
"""Return the shape of the output from this op, for input of given """Return the shape of the output from this op, for input of given
shape and flags. shape and flags.
...@@ -757,6 +757,11 @@ class DownsampleFactorMaxGradGrad(Op): ...@@ -757,6 +757,11 @@ class DownsampleFactorMaxGradGrad(Op):
extra row/col of partial downsampling (False) or ignore it (True). extra row/col of partial downsampling (False) or ignore it (True).
:type ignore_border: bool :type ignore_border: bool
:param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
:type padding: tuple of two ints
:rtype: list :rtype: list
:returns: the shape of the output from this op, for input of given :returns: the shape of the output from this op, for input of given
shape. This will have the same length as imgshape, but with last shape. This will have the same length as imgshape, but with last
...@@ -769,6 +774,8 @@ class DownsampleFactorMaxGradGrad(Op): ...@@ -769,6 +774,8 @@ class DownsampleFactorMaxGradGrad(Op):
if st is None: if st is None:
st = ds st = ds
r, c = imgshape[-2:] r, c = imgshape[-2:]
r += padding[0] * 2
c += padding[1] * 2
if ignore_border: if ignore_border:
out_r = (r - ds[0]) // st[0] + 1 out_r = (r - ds[0]) // st[0] + 1
...@@ -805,12 +812,25 @@ class DownsampleFactorMaxGradGrad(Op): ...@@ -805,12 +812,25 @@ class DownsampleFactorMaxGradGrad(Op):
rval = list(imgshape[:-2]) + [nr, nc] rval = list(imgshape[:-2]) + [nr, nc]
return rval return rval
def __init__(self, ds, ignore_border, st=None): def __init__(self, ds, ignore_border, st=None, padding=(0,0), mode='max'):
self.ds = tuple(ds) self.ds = tuple(ds)
self.ignore_border = ignore_border if not all([isinstance(d, int) for d in ds]):
raise ValueError(
"DownsampleFactorMax downsample parameters must be ints."
" Got %s" % str(ds))
if st is None: if st is None:
st = ds st = ds
assert isinstance(st, (tuple, list))
self.st = tuple(st) self.st = tuple(st)
self.ignore_border = ignore_border
self.padding = tuple(padding)
if self.padding != (0, 0) and not ignore_border:
raise NotImplementedError(
'padding works only with ignore_border=True')
if self.padding[0] >= self.ds[0] or self.padding[1] >= self.ds[1]:
raise NotImplementedError(
'padding_h and padding_w must be smaller than strides')
self.mode = mode
def make_node(self, x, maxout, gz): def make_node(self, x, maxout, gz):
# make_node should only be called by the grad function of # make_node should only be called by the grad function of
...@@ -825,28 +845,42 @@ class DownsampleFactorMaxGradGrad(Op): ...@@ -825,28 +845,42 @@ class DownsampleFactorMaxGradGrad(Op):
return Apply(self, [x, maxout, gz], [x.type()]) return Apply(self, [x, maxout, gz], [x.type()])
def perform(self, node, inp, out): def perform(self, node, inp, out):
if self.mode != 'max':
raise theano.gof.utils.MethodNotDefined()
x, maxout, ggx = inp x, maxout, ggx = inp
z, = out z, = out
if len(x.shape) != 4: if len(x.shape) != 4:
raise NotImplementedError( raise NotImplementedError(
'DownsampleFactorMaxGradGrad requires 4D input for now') 'DownsampleFactorMaxGradGrad requires 4D input for now')
z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st) z_shape = self.out_shape(x.shape, self.ds, self.ignore_border,
self.st, self.padding)
if (z[0] is None) or (z[0].shape != z_shape): if (z[0] is None) or (z[0].shape != z_shape):
z[0] = numpy.zeros(self.out_shape(x.shape, self.ds, z[0] = numpy.zeros(z_shape, dtype=x.dtype)
self.ignore_border, self.st), ggz = z[0] # grad wrt maxout_grad has the same shape as maxout
dtype=x.dtype)
ggz = z[0]
# number of pooling output rows # number of pooling output rows
pr = ggz.shape[-2] pr = ggz.shape[-2]
# number of pooling output cols # number of pooling output cols
pc = ggz.shape[-1] pc = ggz.shape[-1]
ds0, ds1 = self.ds ds0, ds1 = self.ds
st0, st1 = self.st st0, st1 = self.st
img_rows = x.shape[-2] pd0, pd1 = self.padding
img_cols = x.shape[-1] img_rows = x.shape[-2] + 2 * pd0
img_cols = x.shape[-1] + 2 * pd1
# pad the image and its gradients
if self.padding != (0, 0):
y_padded = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype) + x.min() - 1
y_padded[:, :, pd0:(img_rows-pd0), pd1:(img_cols-pd1)] = x
ggx_padded = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype)
ggx_padded[:, :, pd0:(img_rows-pd0), pd1:(img_cols-pd1)] = ggx
else:
y_padded = x
ggx_padded = ggx
for n in xrange(x.shape[0]): for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]): for k in xrange(x.shape[1]):
for r in xrange(pr): for r in xrange(pr):
...@@ -857,8 +891,94 @@ class DownsampleFactorMaxGradGrad(Op): ...@@ -857,8 +891,94 @@ class DownsampleFactorMaxGradGrad(Op):
col_end = builtins.min(col_st + ds1, img_cols) col_end = builtins.min(col_st + ds1, img_cols)
for row_ind in xrange(row_st, row_end): for row_ind in xrange(row_st, row_end):
for col_ind in xrange(col_st, col_end): for col_ind in xrange(col_st, col_end):
if (maxout[n, k, r, c] == x[n, k, row_ind, col_ind]): if (maxout[n, k, r, c] == y_padded[n, k, row_ind, col_ind]):
ggz[n, k, r, c] = ggx[n, k, row_ind, col_ind] ggz[n, k, r, c] = ggx_padded[n, k, row_ind, col_ind]
def infer_shape(self, node, in_shapes): def infer_shape(self, node, in_shapes):
return [in_shapes[0]] return [in_shapes[0]]
def c_code(self, node, name, inp, out, sub):
if self.mode != 'max':
raise theano.gof.utils.MethodNotDefined()
x, maxout, ggx = inp
z, = out # the grad of grad
fail = sub['fail']
ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds
st0, st1 = self.st
pd0, pd1 = self.padding
return """
int z_typenum = PyArray_ObjectType((PyObject*)%(maxout)s, 0);
int z_r, z_c;
z_r = PyArray_DIMS(%(maxout)s)[2];
z_c = PyArray_DIMS(%(maxout)s)[3];
int r, c; // shape of the padded_input
r = PyArray_DIMS(%(x)s)[2];
c = PyArray_DIMS(%(x)s)[3];
r += %(pd0)s * 2;
c += %(pd1)s * 2;
// allocating memory for output
if ((!%(z)s)
|| !PyArray_ISCONTIGUOUS(%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(maxout)s)[0])
||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(maxout)s)[1])
||(PyArray_DIMS(%(z)s)[2] != PyArray_DIMS(%(maxout)s)[2])
||(PyArray_DIMS(%(z)s)[3] != PyArray_DIMS(%(maxout)s)[3])
)
{
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, PyArray_DIMS(%(maxout)s), z_typenum,0);
}
else {
PyArray_FILLWBYTE(%(z)s, 0);
}
dtype_%(maxout)s maximum; // temp var for maximum value in a region
int r_st, r_end, c_st, c_end; // used to index into the input img x
for(int b=0; b<PyArray_DIMS(%(x)s)[0]; b++){
for(int k=0; k<PyArray_DIMS(%(x)s)[1]; k++){
for(int i=0; i< z_r; i++){
r_st = i * %(st0)s;
r_end = r_st + %(ds0)s;
// skip the padding
r_st = r_st < %(pd0)s ? %(pd0)s : r_st;
r_end = r_end > (r - %(pd0)s) ? r - %(pd0)s : r_end;
// from padded_img space to img space
r_st -= %(pd0)s;
r_end -= %(pd0)s;
for(int j=0; j<z_c; j++){
c_st = j * %(st1)s;
c_end = c_st + %(ds1)s;
// skip the padding
c_st = c_st < %(pd1)s ? %(pd1)s : c_st;
c_end = c_end > (c - %(pd1)s) ? c - %(pd1)s : c_end;
// from padding_img space into img space
c_st -= %(pd1)s;
c_end -= %(pd1)s;
// the maximum value
maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,b,k,i,j)))[0];
// z at this position
dtype_%(z)s * z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, b, k, i, j)));
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
{
for(int n=c_st; n<c_end; n++)
{
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
dtype_%(ggx)s * ggx = (
(dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s, b, k, m, n)));
if (a == maximum){
z[0] += ggx[0];
}
}
}
}
}
}
}
"""%locals()
def c_code_cache_version(self):
return (0,1)
...@@ -8,7 +8,9 @@ import theano ...@@ -8,7 +8,9 @@ import theano
import theano.tensor as tensor import theano.tensor as tensor
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.signal.downsample import (DownsampleFactorMax, max_pool_2d, from theano.tensor.signal.downsample import (DownsampleFactorMax, max_pool_2d,
DownsampleFactorMaxGrad, max_pool_2d_same_size) DownsampleFactorMaxGrad,
DownsampleFactorMaxGradGrad,
max_pool_2d_same_size)
from theano import function from theano import function
...@@ -482,7 +484,36 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -482,7 +484,36 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
if numpy.prod(grad_shape) == 0: if numpy.prod(grad_shape) == 0:
continue continue
utt.verify_grad(mp, [imval, grad_val], rng=rng) utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5))
maxpoolsizes = ((5, 3), (3, 5), (3, 3))
stridesizes = ((3, 2), (2, 3), (3, 3))
paddingsizes = ((2, 2), (2, 1), (2, 2))
for i in range(len(imgsizes)):
imgsize = imgsizes[i]
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
maxpoolsize = maxpoolsizes[i]
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
grad_shape = DownsampleFactorMaxGradGrad.out_shape(
imval.shape, maxpoolsize, st=stridesize,
ignore_border=True, padding=paddingsize)
grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad):
out = DownsampleFactorMax(
maxpoolsize, ignore_border=True,
st=stridesize,
padding=paddingsize,
)(input)
grad_op = DownsampleFactorMaxGrad(maxpoolsize, ignore_border=True,
st=stridesize, padding=paddingsize)
return grad_op(input, out, grad)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMax_hessian(self): def test_DownsampleFactorMax_hessian(self):
# Example provided by Frans Cronje, see # Example provided by Frans Cronje, see
# https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J # https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论