提交 a6388954 authored 作者: sebastien-j's avatar sebastien-j

Grad of grad (with tests)

上级 85f08bfd
......@@ -624,8 +624,6 @@ class MaxPoolGrad(PoolGrad):
return Apply(self, [x, maxout, gz], [x.type()])
def perform(self, node, inp, out):
if self.mode not in ('max', 'sum') and self.padding != (0, 0):
raise NotImplementedError()
x, maxout, gz = inp
gx_stg, = out
# number of pooling output rows
......@@ -638,8 +636,6 @@ class MaxPoolGrad(PoolGrad):
pad_w = self.padding[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
inc_pad = self.mode == 'average_inc_pad'
sum_mode = self.mode == 'sum'
# pad the image
if self.padding != (0, 0):
......@@ -676,8 +672,6 @@ class MaxPoolGrad(PoolGrad):
st=self.st, padding=self.padding)(x, maxout, ggx)]
def c_code(self, node, name, inp, out, sub):
if self.mode != 'max':
raise theano.gof.utils.MethodNotDefined()
x, z, gz = inp
gx, = out
fail = sub['fail']
......@@ -795,7 +789,7 @@ class MaxPoolGrad(PoolGrad):
class AveragePoolGrad(PoolGrad):
def __init__(self, ds, ignore_border, st=None, padding=(0, 0), mode='avg_exc_pad'):
def __init__(self, ds, ignore_border, st=None, padding=(0, 0), mode='average_inc_pad'):
PoolGrad.__init__(self, ds, ignore_border, st, padding, mode)
def make_node(self, x, gz):
......@@ -809,7 +803,7 @@ class AveragePoolGrad(PoolGrad):
return Apply(self, [x, gz], [x.type()])
def perform(self, node, inp, out):
if self.mode not in ('max', 'sum') and self.padding != (0, 0):
if self.mode == 'average_exc_pad' and self.padding != (0, 0):
raise NotImplementedError()
x, gz = inp
gx_stg, = out
......@@ -869,8 +863,9 @@ class AveragePoolGrad(PoolGrad):
x, gz = inp
ggx, = grads
return [theano.tensor.zeros_like(x),
theano.gradient.grad_not_implemented(
self, 2, gz, 'Hessian not implemented with padding')]
DownsampleFactorMax(
self.ds, ignore_border=self.ignore_border,
st=self.st, padding=self.padding, mode=self.mode)(ggx)]
class DownsampleFactorMaxGradGrad(Op):
__props__ = ('ds', 'ignore_border', 'st', 'padding', 'mode')
......@@ -974,7 +969,7 @@ class DownsampleFactorMaxGradGrad(Op):
def make_node(self, x, maxout, gz):
# make_node should only be called by the grad function of
# DownsampleFactorMaxGrad, so these asserts should not fail.
# MaxPoolGrad, so these asserts should not fail.
assert isinstance(x, Variable) and x.ndim == 4
assert isinstance(maxout, Variable) and maxout.ndim == 4
assert isinstance(gz, Variable) and gz.ndim == 4
......@@ -1004,7 +999,7 @@ class DownsampleFactorMaxGradGrad(Op):
ds0, ds1 = self.ds
st0, st1 = self.st
pd0, pd1 = self.padding
img_rows = x.shape[-2] + 2 * pd0
img_rows = x.shape[-2] + 2 * pd0
img_cols = x.shape[-1] + 2 * pd1
# pad the image and its gradients
......@@ -1017,7 +1012,7 @@ class DownsampleFactorMaxGradGrad(Op):
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype)
ggx_padded[:, :, pd0:(img_rows-pd0), pd1:(img_cols-pd1)] = ggx
else:
y_padded = x
ggx_padded = ggx
......@@ -1033,7 +1028,7 @@ class DownsampleFactorMaxGradGrad(Op):
for col_ind in xrange(col_st, col_end):
if (maxout[n, k, r, c] == y_padded[n, k, row_ind, col_ind]):
ggz[n, k, r, c] = ggx_padded[n, k, row_ind, col_ind]
def infer_shape(self, node, in_shapes):
return [in_shapes[0]]
......@@ -1041,7 +1036,7 @@ class DownsampleFactorMaxGradGrad(Op):
if self.mode != 'max':
raise theano.gof.utils.MethodNotDefined()
x, maxout, ggx = inp
z, = out # the grad of grad
z, = out # the grad of grad
fail = sub['fail']
ignore_border = int(self.ignore_border)
ds0, ds1 = self.ds
......@@ -1110,7 +1105,7 @@ class DownsampleFactorMaxGradGrad(Op):
(dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s, b, k, m, n)));
if (a == maximum){
z[0] += ggx[0];
}
}
}
}
}
......@@ -1118,7 +1113,7 @@ class DownsampleFactorMaxGradGrad(Op):
}
}
"""%locals()
def c_code_cache_version(self):
return (0,1)
......@@ -423,6 +423,29 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolGrad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
avgpoolshps = ((1, 1), (3, 2), (2, 3))
imval = rng.rand(2, 3, 3, 4) * 10.0
# more variance means numeric gradient will be more accurate
for avgpoolshp in avgpoolshps:
for ignore_border in [True, False]:
for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
# The shape of the gradient will be the shape of the output
grad_shape = DownsampleFactorMax.out_shape(
imval.shape, avgpoolshp, ignore_border=ignore_border)
grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad):
grad_op = AveragePoolGrad(
avgpoolshp, ignore_border=ignore_border, mode=mode)
return grad_op(input, grad)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxGrad_grad_st(self):
"""checks the gradient of the gradient for
the case that stride is used"""
......@@ -450,6 +473,31 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolGrad_grad_st(self):
"""checks the gradient of the gradient for
the case that stride is used"""
rng = numpy.random.RandomState(utt.fetch_seed())
avgpoolshps = ((1, 1), (3, 3), (5, 3))
stridesizes = ((1, 1), (3, 3), (5, 7))
imval = rng.rand(1, 2, 16, 16)
for avgpoolshp in avgpoolshps:
for ignore_border in [True, False]:
for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
for stride in stridesizes:
grad_shape = DownsampleFactorMax.out_shape(
imval.shape, avgpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
def mp(input, grad):
grad_op = AveragePoolGrad(
avgpoolshp, ignore_border=ignore_border,
st=stride, mode=mode)
return grad_op(input, grad)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxGrad_grad_st_extra(self):
"""checks the gradient of the gradient for the case that
stride is used for extra examples"""
......@@ -484,14 +532,47 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
if numpy.prod(grad_shape) == 0:
continue
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolGrad_grad_st_extra(self):
"""checks the gradient of the gradient for the case that
stride is used for extra examples"""
rng = numpy.random.RandomState(utt.fetch_seed())
avgpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
(2, 3), (10, 10), (1, 1))
imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
(8, 5), (8, 5), (8, 5))
for indx in numpy.arange(len(avgpoolshps)):
imvsize = imvsizs[indx]
imval = rng.rand(1, 2, imvsize[0], imvsize[1])
stride = stridesizes[indx]
avgpoolshp = avgpoolshps[indx]
for ignore_border in [True, False]:
for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
grad_shape = DownsampleFactorMax.out_shape(
imval.shape, avgpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
def mp(input, grad):
grad_op = AveragePoolGrad(
avgpoolshp, ignore_border=ignore_border,
st=stride, mode=mode)
return grad_op(input, grad)
# skip the grad verification when the output is empty
if numpy.prod(grad_shape) == 0:
continue
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5))
maxpoolsizes = ((5, 3), (3, 5), (3, 3))
stridesizes = ((3, 2), (2, 3), (3, 3))
paddingsizes = ((2, 2), (2, 1), (2, 2))
for i in range(len(imgsizes)):
imgsize = imgsizes[i]
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
......@@ -513,7 +594,34 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
st=stridesize, padding=paddingsize)
return grad_op(input, out, grad)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolPaddingStride_grad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5))
avgpoolsizes = ((5, 3), (3, 5), (3, 3))
stridesizes = ((3, 2), (2, 3), (3, 3))
paddingsizes = ((2, 2), (2, 1), (2, 2))
for i in range(len(imgsizes)):
imgsize = imgsizes[i]
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
avgpoolsize = avgpoolsizes[i]
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
#'average_exc_pad' with non-zero padding is not implemented
for mode in ['sum', 'average_inc_pad']:
grad_shape = DownsampleFactorMax.out_shape(
imval.shape, avgpoolsize, st=stridesize,
ignore_border=True, padding=paddingsize)
grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad):
grad_op = AveragePoolGrad(avgpoolsize, ignore_border=True,
st=stridesize, padding=paddingsize,
mode=mode)
return grad_op(input, grad)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMax_hessian(self):
# Example provided by Frans Cronje, see
# https://groups.google.com/d/msg/theano-users/qpqUy_3glhw/JMwIvlN5wX4J
......@@ -681,7 +789,7 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
padding=padding)(image)],
[image_val], DownsampleFactorMax)
# checking shapes generated by DownsampleFactorMaxGrad
# checking shapes generated by MaxPoolGrad
maxout_val = rng.rand(*out_shapes[k][i][j])
gz_val = rng.rand(*out_shapes[k][i][j])
self._compile_and_check([image, maxout, gz],
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论