提交 9dc07802 authored 作者: abergeron's avatar abergeron

Merge pull request #2783 from nouiz/pool_average

Average pool CPU with python code
...@@ -721,7 +721,8 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -721,7 +721,8 @@ class GpuDnnPoolDesc(GpuOp):
:param ws: windows size :param ws: windows size
:param stride: (dx, dy) :param stride: (dx, dy)
:param mode: 'max' or 'average' :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'
The old deprecated name 'average' correspond to 'average_inc_pad'
:param pad: (padX, padY) padding information. :param pad: (padX, padY) padding information.
padX is the size of the left and right borders, padX is the size of the left and right borders,
padY is the size of the top and bottom borders. padY is the size of the top and bottom borders.
...@@ -744,7 +745,9 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -744,7 +745,9 @@ class GpuDnnPoolDesc(GpuOp):
return False return False
def __init__(self, ws=(1, 1), stride=(1, 1), mode='max', pad=(0, 0)): def __init__(self, ws=(1, 1), stride=(1, 1), mode='max', pad=(0, 0)):
assert mode in ('max', 'average') if mode == 'average':
mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
self.mode = mode self.mode = mode
assert len(ws) == 2 assert len(ws) == 2
self.ws = ws self.ws = ws
...@@ -772,8 +775,12 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -772,8 +775,12 @@ class GpuDnnPoolDesc(GpuOp):
if self.mode == 'max': if self.mode == 'max':
mode_flag = 'CUDNN_POOLING_MAX' mode_flag = 'CUDNN_POOLING_MAX'
elif self.mode == "average": elif self.mode == "average_inc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING' mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
else: else:
raise NotImplementedError("Unsupported pooling model.") raise NotImplementedError("Unsupported pooling model.")
...@@ -1194,7 +1201,8 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1194,7 +1201,8 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
:param img: images to do the pooling over :param img: images to do the pooling over
:param ws: subsampling window size :param ws: subsampling window size
:param stride: subsampling stride (default: (1, 1)) :param stride: subsampling stride (default: (1, 1))
:param mode: one of 'max', 'average' (default: 'max') :param mode: one of 'max', 'average_inc_pad' or 'average_exc_pad
(default: 'max')
:param pad: (padX, padY) padding information. :param pad: (padX, padY) padding information.
padX is the size of the left and right borders, padX is the size of the left and right borders,
padY is the size of the top and bottom borders. padY is the size of the top and bottom borders.
...@@ -1625,7 +1633,7 @@ if True: ...@@ -1625,7 +1633,7 @@ if True:
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([DownsampleFactorMax]) @local_optimizer([DownsampleFactorMax])
def local_pool_dnn_stride(node): def local_pool_dnn_alternative(node):
if not dnn_available(): if not dnn_available():
return return
if isinstance(node.op, DownsampleFactorMax): if isinstance(node.op, DownsampleFactorMax):
...@@ -1635,9 +1643,10 @@ if True: ...@@ -1635,9 +1643,10 @@ if True:
ds = node.op.ds ds = node.op.ds
stride = node.op.st stride = node.op.st
pad = node.op.padding pad = node.op.padding
mode = node.op.mode
if (img.owner and isinstance(img.owner.op, HostFromGpu)): if (img.owner and isinstance(img.owner.op, HostFromGpu)):
ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]), ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
ds, stride=stride, pad=pad) ds, stride=stride, pad=pad, mode=mode)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
...@@ -1667,12 +1676,13 @@ if True: ...@@ -1667,12 +1676,13 @@ if True:
ds = node.op.ds ds = node.op.ds
st = node.op.st st = node.op.st
pad = node.op.padding pad = node.op.padding
mode = node.op.mode
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
(out.owner and isinstance(out.owner.op, HostFromGpu)) or (out.owner and isinstance(out.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op, (inp_grad.owner and isinstance(inp_grad.owner.op,
HostFromGpu))): HostFromGpu))):
desc = GpuDnnPoolDesc(ws=ds, stride=st, mode="max", pad=pad)() desc = GpuDnnPoolDesc(ws=ds, stride=st, mode=mode, pad=pad)()
if not node.op.ignore_border: if not node.op.ignore_border:
return return
ret = GpuDnnPoolGrad()(gpu_contiguous(inp), ret = GpuDnnPoolGrad()(gpu_contiguous(inp),
......
...@@ -1648,8 +1648,9 @@ import theano.tensor.signal.downsample as downsample ...@@ -1648,8 +1648,9 @@ import theano.tensor.signal.downsample as downsample
def local_gpu_downsample_factor_max(node): def local_gpu_downsample_factor_max(node):
if (isinstance(node.op, downsample.DownsampleFactorMax) if (isinstance(node.op, downsample.DownsampleFactorMax)
and node.op.ds == node.op.st): and node.op.ds == node.op.st):
assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding') assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding',
if node.op.padding != (0, 0): 'mode')
if node.op.padding != (0, 0) or node.op.mode != 'max':
return return
x, = node.inputs x, = node.inputs
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
...@@ -1662,8 +1663,9 @@ def local_gpu_downsample_factor_max(node): ...@@ -1662,8 +1663,9 @@ def local_gpu_downsample_factor_max(node):
def local_gpu_downsample_factor_max_grad(node): def local_gpu_downsample_factor_max_grad(node):
if (isinstance(node.op, downsample.DownsampleFactorMaxGrad) and if (isinstance(node.op, downsample.DownsampleFactorMaxGrad) and
node.op.ds == node.op.st): node.op.ds == node.op.st):
assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding') assert node.op.__props__ == ('ds', 'ignore_border', 'st', 'padding',
if node.op.padding != (0, 0): 'mode')
if node.op.padding != (0, 0) or node.op.mode != 'max':
return return
x, z, gz = node.inputs x, z, gz = node.inputs
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
...@@ -1678,6 +1680,8 @@ def local_gpu_downsample_factor_max_grad(node): ...@@ -1678,6 +1680,8 @@ def local_gpu_downsample_factor_max_grad(node):
@local_optimizer([downsample.DownsampleFactorMaxGradGrad]) @local_optimizer([downsample.DownsampleFactorMaxGradGrad])
def local_gpu_downsample_factor_max_grad_grad(node): def local_gpu_downsample_factor_max_grad_grad(node):
if isinstance(node.op, downsample.DownsampleFactorMaxGradGrad): if isinstance(node.op, downsample.DownsampleFactorMaxGradGrad):
assert node.op.__props__ == ('ds', 'ignore_border', 'st')
x, z, gx = node.inputs x, z, gx = node.inputs
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
op = GpuDownsampleFactorMaxGradGrad(node.op.ds, op = GpuDownsampleFactorMaxGradGrad(node.op.ds,
......
...@@ -183,8 +183,12 @@ def test_pooling(): ...@@ -183,8 +183,12 @@ def test_pooling():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
x = T.ftensor4() x = T.ftensor4()
for func, pad in product((T.max, T.mean), for mode, pad in product(('max', 'average_inc_pad', 'average_exc_pad'),
((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))): ((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))):
if mode == 'max':
func = T.max
else:
func = T.mean
if pad != (0, 0) and cuda.dnn.version() == -1: if pad != (0, 0) and cuda.dnn.version() == -1:
continue continue
...@@ -195,29 +199,23 @@ def test_pooling(): ...@@ -195,29 +199,23 @@ def test_pooling():
for stride in (2, 3): for stride in (2, 3):
if stride > ws: if stride > ws:
continue continue
if func is T.max: if pad[0] > stride or pad[1] > stride:
if pad[0] > stride or pad[1] > stride: # Not implemented
# Not implemented continue
continue # We will check that the opt introduced it.
# We will check that the opt introduced it. out1 = max_pool_2d(x, (ws, ws),
out1 = max_pool_2d(x, (ws, ws), st=(stride, stride),
st=(stride, stride), ignore_border=True,
ignore_border=True, padding=pad, mode=mode)
padding=pad)
else:
out1 = cuda.dnn.dnn_pool(
x, ws=(ws, ws),
stride=(stride, stride),
pad=pad,
mode='max' if func is T.max else "average")
out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride), out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride),
pad=pad, pad=pad,
pool_function=func) pool_function=func)
mode_without_gpu2 = mode_without_gpu.including()
mode_without_gpu2.check_isfinite = False
f1 = theano.function([x], out1, mode=mode_with_gpu) f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes]) for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_without_gpu) f2 = theano.function([x], out2, mode=mode_without_gpu2)
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes]) for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 100, 100), for shp in [(1, 10, 100, 100),
...@@ -245,7 +243,7 @@ def test_pooling(): ...@@ -245,7 +243,7 @@ def test_pooling():
# This test the CPU grad + opt + GPU implemtentation # This test the CPU grad + opt + GPU implemtentation
def fn(x): def fn(x):
return max_pool_2d(x, (ws, ws), ignore_border=True, return max_pool_2d(x, (ws, ws), ignore_border=True,
padding=pad) padding=pad, mode=mode)
theano.tests.unittest_tools.verify_grad(fn, [data], theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False, cast_to_output_type=False,
mode=mode_with_gpu) mode=mode_with_gpu)
...@@ -261,7 +259,7 @@ def test_pooling(): ...@@ -261,7 +259,7 @@ def test_pooling():
x, ws=(ws, ws), x, ws=(ws, ws),
stride=(stride, stride), stride=(stride, stride),
pad=pad, pad=pad,
mode='max' if func is T.max else "average") mode=mode)
return dnn_op return dnn_op
theano.tests.unittest_tools.verify_grad( theano.tests.unittest_tools.verify_grad(
fn, [data], fn, [data],
...@@ -274,17 +272,16 @@ def test_pooling(): ...@@ -274,17 +272,16 @@ def test_pooling():
for node in fg.maker.fgraph.toposort()]) for node in fg.maker.fgraph.toposort()])
g_out = fg(data) g_out = fg(data)
if func is T.max: # Compare again the CPU result
# Compare again the CPU result out = max_pool_2d(x, (ws, ws),
out = max_pool_2d(x, (ws, ws), padding=pad,
padding=pad, ignore_border=True, mode=mode)
ignore_border=True) fc = theano.function([x], theano.grad(out.sum(), x),
fc = theano.function([x], theano.grad(out.sum(), x), mode=mode_without_gpu)
mode=mode_without_gpu) assert any([isinstance(node.op, DownsampleFactorMaxGrad)
assert any([isinstance(node.op, DownsampleFactorMaxGrad) for node in fc.maker.fgraph.toposort()])
for node in fc.maker.fgraph.toposort()]) c_out = fc(data)
c_out = fc(data) assert numpy.allclose(c_out, g_out)
assert numpy.allclose(c_out, g_out)
def test_pooling_opt(): def test_pooling_opt():
...@@ -523,7 +520,7 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -523,7 +520,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
for params in product( for params in product(
[(1, 1), (2, 2), (3, 3)], [(1, 1), (2, 2), (3, 3)],
[(1, 1), (2, 2), (3, 3)], [(1, 1), (2, 2), (3, 3)],
['max', 'average'] ['max', 'average_inc_pad', 'average_exc_pad']
): ):
desc = dnn.GpuDnnPoolDesc( desc = dnn.GpuDnnPoolDesc(
ws=params[0], ws=params[0],
...@@ -559,7 +556,7 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -559,7 +556,7 @@ class TestDnnInferShapes(utt.InferShapeTester):
for params in product( for params in product(
[(1, 1), (2, 2), (3, 3)], [(1, 1), (2, 2), (3, 3)],
[(1, 1), (2, 2), (3, 3)], [(1, 1), (2, 2), (3, 3)],
['max', 'average'] ['max', 'average_inc_pad']
): ):
desc = dnn.GpuDnnPoolDesc( desc = dnn.GpuDnnPoolDesc(
ws=params[0], ws=params[0],
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论