提交 a16e91f7 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

theano.tensor.signal.Pool with 3D support.

上级 172e699c
...@@ -35,7 +35,7 @@ from .nnet import GpuSoftmax ...@@ -35,7 +35,7 @@ from .nnet import GpuSoftmax
from .opt import (gpu_seqopt, register_opt, from .opt import (gpu_seqopt, register_opt,
op_lifter, register_opt2) op_lifter, register_opt2)
from .opt_util import alpha_merge, output_merge, inplace_allocempty from .opt_util import alpha_merge, output_merge, inplace_allocempty, pad_dims, unpad_dims
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
...@@ -1253,7 +1253,7 @@ class GpuDnnPoolGrad(DnnBase): ...@@ -1253,7 +1253,7 @@ class GpuDnnPoolGrad(DnnBase):
return [shape[0]] return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): def dnn_pool(img, ws, stride=None, mode='max', pad=None):
""" """
GPU pooling using cuDNN from NVIDIA. GPU pooling using cuDNN from NVIDIA.
...@@ -1267,13 +1267,13 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1267,13 +1267,13 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
img img
Images to do the pooling over. Images to do the pooling over.
ws : tuple ws : tuple
Subsampling window size. Subsampling window size. Should have 2 or 3 elements.
stride : tuple stride : tuple
Subsampling stride (default: (1, 1)). Subsampling stride (default: (1, 1) or (1, 1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'} mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad : tuple pad : tuple
(padX, padY) or (padX, padY, padZ) (padX, padY) or (padX, padY, padZ)
default: (0, 0) default: (0, 0) or (0, 0, 0)
.. warning:: The cuDNN library only works with GPU that have a compute .. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
...@@ -1285,6 +1285,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -1285,6 +1285,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
if stride is None:
stride = (1,) * len(ws)
if pad is None:
pad = (0,) * len(ws)
if mode == "sum": if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad) ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
context_name = ret.type.context_name context_name = ret.type.context_name
...@@ -1868,9 +1872,18 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs): ...@@ -1868,9 +1872,18 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
if not op.ignore_border: if not op.ignore_border:
return return
img, ws, stride, pad = inputs img, ws, stride, pad = inputs
img = as_gpuarray_variable(img, ctx_name) nd = op.ndim if op.ndim else (img.ndim - 2)
if nd not in (2, 3):
return
img = gpu_contiguous(as_gpuarray_variable(img, ctx_name))
mode = op.mode mode = op.mode
return dnn_pool(gpu_contiguous(img), ws, stride=stride, pad=pad, mode=mode) if img.ndim == nd + 2:
return dnn_pool(img, ws, stride=stride, pad=pad, mode=mode)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
img_padded = pad_dims(img, 2, nd)
ret_padded = dnn_pool(img_padded, ws, stride=stride, pad=pad, mode=mode)
return unpad_dims(ret_padded, img, 2, nd)
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
...@@ -1882,17 +1895,33 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): ...@@ -1882,17 +1895,33 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not op.ignore_border: if not op.ignore_border:
return return
inp, out, out_grad, ws, stride, pad = inputs inp, out, out_grad, ws, stride, pad = inputs
inp = as_gpuarray_variable(inp, ctx_name) nd = op.ndim if op.ndim else (inp.ndim - 2)
out = as_gpuarray_variable(out, ctx_name) if nd not in (2, 3):
out_grad = as_gpuarray_variable(out_grad, ctx_name) return
inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
mode = op.mode mode = op.mode
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), if inp.ndim == nd + 2:
gpu_contiguous(out), return GpuDnnPoolGrad(mode=mode)(inp,
gpu_contiguous(out_grad), out,
ws, out_grad,
stride, ws,
pad) stride,
pad)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded = pad_dims(inp, 2, nd)
out_padded = pad_dims(out, 2, nd)
out_grad_padded = pad_dims(out_grad, 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_padded,
out_grad_padded,
ws,
stride,
pad)
return unpad_dims(ret_padded, inp, 2, nd)
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
...@@ -1904,16 +1933,28 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): ...@@ -1904,16 +1933,28 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not op.ignore_border: if not op.ignore_border:
return return
inp, out_grad, ws, stride, pad = inputs inp, out_grad, ws, stride, pad = inputs
inp = as_gpuarray_variable(inp, ctx_name) nd = op.ndim if op.ndim else (inp.ndim - 2)
out_grad = as_gpuarray_variable(out_grad, ctx_name) if nd not in (2, 3):
return
inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
mode = op.mode mode = op.mode
cg = gpu_contiguous(out_grad) if inp.ndim == nd + 2:
# We reuse out_grad because cuDNN does not use the value of the `out`
# We reuse cg because cuDNN does not use the value of the `out` # argument but still checks its shape for average pooling. This
# argument but still checks its shape for average pooling. This # has been observed in v2 and v3 as far as I know.
# has been observed in v2 and v3 as far as I know. return GpuDnnPoolGrad(mode=mode)(inp, out_grad, out_grad, ws, stride, pad)
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), cg, cg, ws, stride, pad) else:
inp_padded = pad_dims(inp, 2, nd)
out_grad_padded = pad_dims(out_grad, 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_grad_padded,
out_grad_padded,
ws,
stride,
pad)
return unpad_dims(ret_padded, inp, 2, nd)
@register_opt('cudnn', 'fast_compile') @register_opt('cudnn', 'fast_compile')
......
...@@ -3,12 +3,12 @@ from functools import wraps ...@@ -3,12 +3,12 @@ from functools import wraps
import numpy import numpy
from theano import scalar as scal, Constant from theano import tensor, scalar as scal, Constant
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value, from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError) NotScalarConstantError)
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, gpu_alloc_empty from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, GpuReshape, gpu_alloc_empty
from .elemwise import GpuDimShuffle, GpuElemwise from .elemwise import GpuDimShuffle, GpuElemwise
_one = scal.constant(numpy.asarray(1.0, dtype='float32')) _one = scal.constant(numpy.asarray(1.0, dtype='float32'))
...@@ -329,3 +329,48 @@ def inplace_allocempty(op, idx): ...@@ -329,3 +329,48 @@ def inplace_allocempty(op, idx):
return maker(node, inputs) return maker(node, inputs)
return opt return opt
return wrapper return wrapper
def pad_dims(input, leftdims, rightdims):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert input.ndim >= rightdims
if input.ndim == (leftdims + rightdims):
return input
# extract image dimensions
img_shape = input.shape[-rightdims:]
# count the number of "leading" dimensions, store as dmatrix
batch_size = tensor.prod(input.shape[:-rightdims])
batch_size = tensor.shape_padright(batch_size, 1)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape = tensor.cast(tensor.join(0, batch_size,
tensor.as_tensor([1] * (leftdims - 1)),
img_shape), 'int64')
input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
return input_ND
def unpad_dims(output, input, leftdims, rightdims):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if output.ndim == input.ndim:
return output
# restore the output to the original shape
outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:])
return GpuReshape(input.ndim)(output, outshp)
...@@ -11,11 +11,12 @@ from six import StringIO ...@@ -11,11 +11,12 @@ from six import StringIO
import theano.tensor as T import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs from theano.sandbox.neighbours import images2neibs
from theano.tensor.signal.pool import pool_2d from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import MaxPoolGrad, AveragePoolGrad from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from .. import dnn from .. import dnn
from ..basic_ops import GpuAllocEmpty from ..basic_ops import GpuAllocEmpty
from ..type import gpuarray_shared_constructor
from .config import mode_with_gpu, mode_without_gpu, test_ctx_name from .config import mode_with_gpu, mode_without_gpu, test_ctx_name
from . import test_nnet from . import test_nnet
...@@ -166,6 +167,27 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None, ...@@ -166,6 +167,27 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None,
return pooled_output return pooled_output
def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
pool_function=T.max, mode='ignore_borders'):
if strides is None:
strides = ds
shape = input.shape
# reshape to B, C*0, 1, 2 and do the pooling on 1, 2
first = input.reshape((shape[0], shape[1] * shape[2], shape[3], shape[4]))
pooled1 = pool_2d_i2n(first, ds=ds[1:], strides=strides[1:], pad=pad[1:],
pool_function=pool_function, mode=mode)
shp1 = pooled1.shape
# reshape to B, C, 0, 1*2 and do the pooling on 0
second = pooled1.reshape((shape[0], shape[1], shape[2], shp1[2] * shp1[3]))
pooled2 = pool_2d_i2n(second, ds=(ds[0], 1), strides=(strides[0], 1),
pad=(pad[0], 0), pool_function=pool_function, mode=mode)
shp2 = pooled2.shape
return pooled2.reshape((shape[0], shape[1], shp2[2], shp1[2], shp1[3]))
def test_pooling(): def test_pooling():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
...@@ -178,7 +200,7 @@ def test_pooling(): ...@@ -178,7 +200,7 @@ def test_pooling():
x = T.ftensor4() x = T.ftensor4()
for mode, pad in product(modes, for mode, pad in product(modes,
((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))): ((0, 0), (1, 0), (0, 1), (2, 3), (3, 2))):
if mode == 'max': if mode == 'max':
func = T.max func = T.max
else: else:
...@@ -278,6 +300,119 @@ def test_pooling(): ...@@ -278,6 +300,119 @@ def test_pooling():
utt.assert_allclose(c_out, g_out) utt.assert_allclose(c_out, g_out)
def test_pooling_3d():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
# 'average_exc_pad' is disabled for versions < 4004
if dnn.version(raises=False) < 4004:
modes = ('max', 'average_inc_pad')
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
x = T.ftensor5()
for mode, pad in product(modes,
((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1),
(3, 2, 2), (2, 3, 2), (2, 2, 3))):
if mode == 'max':
func = T.max
else:
func = T.mean
if pad != (0, 0, 0) and func is T.mean:
continue
for ws in (4, 2, 5):
for stride in (2, 3):
if stride > ws:
continue
if pad[0] > stride or pad[1] > stride or pad[2] > stride:
# Not implemented
continue
# We will check that the opt introduced it.
out1 = pool_3d(x, (ws, ws, ws),
st=(stride, stride, stride),
ignore_border=True,
padding=pad, mode=mode)
out2 = pool3d2d(x, ds=(ws, ws, ws), strides=(stride, stride, stride),
pad=pad,
pool_function=func)
mode_without_gpu2 = mode_without_gpu.including()
mode_without_gpu2.check_isfinite = False
f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_without_gpu2)
assert not any([isinstance(node.op, dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 30, 30, 30),
(1, 3, 29, 29, 29),
(3, 1, 47, 97, 47),
]:
data = numpy.random.normal(0, 1, shp).astype("float32")
a = f1(data)
b = f2(data)
utt.assert_allclose(a, b)
# Test the grad
for shp in [(1, 1, 2, 2, 2),
(1, 1, 3, 3, 3)]:
data = numpy.random.normal(0, 1, shp).astype("float32") * 10
ws = 2
stride = 2
if pad[0] > stride or pad[1] > stride or pad[2] > stride:
# Not implemented
continue
# This test the CPU grad + opt + GPU implemtentation
def fn(x):
return pool_3d(x, (ws, ws, ws), ignore_border=True,
padding=pad, mode=mode)
utt.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that the opt would have inserted it.
fg = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
# Test the GPU grad + GPU implementation
def fn(x):
dnn_op = dnn.dnn_pool(
x, ws=(ws, ws, ws),
stride=(stride, stride, stride),
pad=pad,
mode=mode)
return dnn_op
utt.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that we get the good op.
fg = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
g_out = fg(data)
# Compare against the CPU result
out = pool_3d(x, (ws, ws, ws),
padding=pad,
ignore_border=True, mode=mode)
fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu)
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
c_out = fc(data)
utt.assert_allclose(c_out, g_out)
def test_pooling_with_tensor_vars(): def test_pooling_with_tensor_vars():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
...@@ -331,6 +466,7 @@ def test_pooling_opt(): ...@@ -331,6 +466,7 @@ def test_pooling_opt():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
# 2D pooling
x = T.fmatrix() x = T.fmatrix()
f = theano.function( f = theano.function(
...@@ -344,6 +480,7 @@ def test_pooling_opt(): ...@@ -344,6 +480,7 @@ def test_pooling_opt():
f(numpy.zeros((10, 10), dtype='float32')) f(numpy.zeros((10, 10), dtype='float32'))
# gradient of 2D pooling
f = theano.function( f = theano.function(
[x], [x],
T.grad(pool_2d(x, ds=(2, 2), mode='average_inc_pad', T.grad(pool_2d(x, ds=(2, 2), mode='average_inc_pad',
...@@ -362,11 +499,86 @@ def test_pooling_opt(): ...@@ -362,11 +499,86 @@ def test_pooling_opt():
pool_2d(x, ds=(2, 3), mode='sum', pool_2d(x, ds=(2, 3), mode='sum',
ignore_border=True), ignore_border=True),
mode=mode_with_gpu) mode=mode_with_gpu)
data = numpy.random.rand(10, 10).astype('float32')
f(data)
# 3D pooling
x = T.ftensor3()
f = theano.function(
[x],
pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad',
ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, dnn.GpuDnnPool) assert any([isinstance(n.op, dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
data = numpy.random.rand(10, 10).astype('float32')
f(data) f(numpy.zeros((10, 10, 10), dtype='float32'))
# gradient of 3D pooling
f = theano.function(
[x],
T.grad(pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad',
ignore_border=True).sum(),
x),
mode=mode_with_gpu.including("cudnn"))
assert any([isinstance(n.op, dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
def test_pooling_opt_arbitrary_dimensions():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
# 'average_exc_pad' is disabled for versions < 4004
if dnn.version(raises=False) < 4004:
modes = ('max', 'average_inc_pad')
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
for n_non_pool_dims in (0, 1, 2, 3):
for ws in ((2, 2), (3, 3, 3)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp = (2,) * n_non_pool_dims + (5,) * len(ws)
data = numpy.random.normal(0, 1, shp).astype('float32')
input = gpuarray_shared_constructor(data)
for mode in modes:
out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws)
out_pool_grad = T.grad(T.sum(out_pool), wrt=input)
out = [out_pool, out_pool_grad]
# run on GPU
fg = theano.function([], out, mode=mode_with_gpu)
assert any([isinstance(node.op, dnn.GpuDnnPool)
for node in fg.maker.fgraph.toposort()])
assert any([isinstance(node.op, dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
res_gpu = fg()
# run on CPU
fc = theano.function([], out, mode=mode_without_gpu)
assert any([isinstance(node.op, Pool)
for node in fc.maker.fgraph.toposort()])
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
res_cpu = fg()
# check for similarity
utt.assert_allclose(res_gpu[0], res_cpu[0])
utt.assert_allclose(res_gpu[1], res_cpu[1])
def test_dnn_tag(): def test_dnn_tag():
...@@ -625,6 +837,33 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -625,6 +837,33 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnPool dnn.GpuDnnPool
) )
def test_pool_3d(self):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor5('img')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5, 6),
dtype='float32'
)
# 'average_exc_pad' is disabled for versions < 4004
if dnn.version(raises=False) < 4004:
modes = ['max', 'average_inc_pad']
else:
modes = ['max', 'average_inc_pad', 'average_exc_pad']
for params in product(
[(1, 1, 1), (2, 2, 2), (3, 3, 3)],
[(1, 1, 1), (2, 2, 2), (3, 3, 3)],
modes
):
self._compile_and_check(
[img],
[dnn.GpuDnnPool(mode=params[2])(img, params[0], params[1], (0, 0, 0))],
[img_val],
dnn.GpuDnnPool
)
def test_pool_grad(self): def test_pool_grad(self):
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
...@@ -664,6 +903,45 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -664,6 +903,45 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnPoolGrad dnn.GpuDnnPoolGrad
) )
def test_pool_3d_grad(self):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor5('img')
img_grad = T.ftensor5('img_grad')
out = T.ftensor5('out')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5, 6),
dtype='float32'
)
img_grad_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5, 6),
dtype='float32'
)
out_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5, 6),
dtype='float32'
)
for params in product(
[(1, 1, 1), (2, 2, 2), (3, 3, 3)],
[(1, 1, 1), (2, 2, 2), (3, 3, 3)],
['max', 'average_inc_pad']
):
pool_grad = dnn.GpuDnnPoolGrad(mode=params[2])(
img,
out,
img_grad,
params[0],
params[1],
(0, 0, 0)
)
self._compile_and_check(
[img, img_grad, out],
[pool_grad],
[img_val, img_grad_val, out_val],
dnn.GpuDnnPoolGrad
)
# this has been a problem in the past # this has been a problem in the past
def test_dnn_conv_border_mode(): def test_dnn_conv_border_mode():
......
...@@ -30,7 +30,8 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, ...@@ -30,7 +30,8 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax, from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad) GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.nnet import GpuSoftmax from theano.sandbox.cuda.nnet import GpuSoftmax
from theano.sandbox.cuda.opt_util import alpha_merge, output_merge from theano.sandbox.cuda.opt_util import (alpha_merge, output_merge,
pad_dims, unpad_dims)
from theano.sandbox.cuda import gpu_seqopt, register_opt from theano.sandbox.cuda import gpu_seqopt, register_opt
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
...@@ -1391,20 +1392,23 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1391,20 +1392,23 @@ class GpuDnnPoolDesc(GpuOp):
def do_constant_folding(self, node): def do_constant_folding(self, node):
return False return False
def __init__(self, ws=(1, 1), stride=(1, 1), mode='max', pad=(0, 0)): def __init__(self, ws=(1, 1), stride=None, mode='max', pad=None):
if mode == 'average': if mode == 'average':
mode = 'average_inc_pad' mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad') assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
self.mode = mode self.mode = mode
if stride is None:
stride = (1,) * len(ws)
if pad is None:
pad = (0,) * len(ws)
assert len(ws) == len(stride) and len(stride) == len(pad) assert len(ws) == len(stride) and len(stride) == len(pad)
assert len(ws) in (2, 3) assert len(ws) in (2, 3)
self.ws = ws self.ws = ws
self.stride = stride self.stride = stride
self.pad = pad self.pad = pad
if (pad[0] != 0 or pad[1] != 0) and version() == -1:
raise RuntimeError("cuDNN pooling with padding requires cuDNN v2")
if self.get_ndim() == 3 and version() < (3000, 3000): if self.get_ndim() == 3 and version() < (3000, 3000):
raise RuntimeError("cuDNN 3d pooling requires cuDNN v3") raise RuntimeError("cuDNN 3d pooling requires cuDNN v3")
if (mode == 'average_exc_pad' and max(pad) > 0 and if (mode == 'average_exc_pad' and max(pad) > 0 and
...@@ -1418,12 +1422,9 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1418,12 +1422,9 @@ class GpuDnnPoolDesc(GpuOp):
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
if not hasattr(self, 'pad'): if not hasattr(self, 'pad'):
self.pad = (0, 0) self.pad = (0,) * self.get_ndim()
def make_node(self): def make_node(self):
if self.pad != (0, 0) and version() == -1:
raise RuntimeError("cuDNN pooling with padding requires cuDNN v2")
node = Apply(self, [], node = Apply(self, [],
[CDataType("cudnnPoolingDescriptor_t", [CDataType("cudnnPoolingDescriptor_t",
freefunc="cudnnDestroyPoolingDescriptor")()]) freefunc="cudnnDestroyPoolingDescriptor")()])
...@@ -1444,8 +1445,6 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -1444,8 +1445,6 @@ class GpuDnnPoolDesc(GpuOp):
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING' mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad": elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING' mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
else: else:
raise NotImplementedError("Unsupported pooling model.") raise NotImplementedError("Unsupported pooling model.")
...@@ -1616,8 +1615,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); } ...@@ -1616,8 +1615,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING' mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad": elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING' mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
else: else:
raise NotImplementedError("Unsupported pooling model.") raise NotImplementedError("Unsupported pooling model.")
...@@ -1872,8 +1869,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); } ...@@ -1872,8 +1869,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING' mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad": elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING' mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
else: else:
raise NotImplementedError("Unsupported pooling model.") raise NotImplementedError("Unsupported pooling model.")
...@@ -1976,28 +1971,33 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1976,28 +1971,33 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [shape[0]] return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): def dnn_pool(img, ws, stride=None, mode='max', pad=None):
""" """
GPU pooling using cuDNN from NVIDIA. GPU pooling using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel', For 2D pooling, the memory layout to use is 'bc01', that is 'batch',
'first dim', 'second dim' in that order. 'channel', 'first dim', 'second dim' in that order.
For 3D pooling, the memory layout to use is 'bc012', that is 'batch',
'channel', 'first dim', 'second dim', 'third dim'.
Parameters Parameters
---------- ----------
img img
Images to do the pooling over. Images to do the pooling over.
ws ws
Subsampling window size. Subsampling window size. Should have 2 or 3 elements.
stride stride
Subsampling stride (default: (1, 1)). Subsampling stride (default: (1, 1) or (1, 1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad, 'sum'} mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad : pad
(pad_h, pad_w) padding information. Padding: (pad_h, pad_w) for 2D or (pad_h, pad_w, pad_d) for 3D.
pad_h is the number of zero-valued pixels added to each of the top and pad_h is the number of zero-valued pixels added to each of the top and
bottom borders. bottom borders.
pad_w is the number of zero-valued pixels added to each of the left pad_w is the number of zero-valued pixels added to each of the left
and right borders. and right borders.
pad_d is the number of zero-valued pixels added to each of the front
and back borders (3D pooling only).
.. warning:: The cuDNN library only works with GPU that have a compute .. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not capability of 3.0 or higer. This means that older GPU will not
...@@ -2009,6 +2009,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)): ...@@ -2009,6 +2009,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
""" """
img = gpu_contiguous(img) img = gpu_contiguous(img)
if stride is None:
stride = (1,) * len(ws)
if pad is None:
pad = (0,) * len(ws)
if mode == "sum": if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad) ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
window_elem = theano.tensor.prod(ws).astype(ret.dtype) window_elem = theano.tensor.prod(ws).astype(ret.dtype)
...@@ -2972,10 +2976,21 @@ if True: ...@@ -2972,10 +2976,21 @@ if True:
if not node.op.ignore_border: if not node.op.ignore_border:
return return
img, ws, stride, pad = node.inputs img, ws, stride, pad = node.inputs
nd = node.op.ndim if node.op.ndim else (img.ndim - 2)
mode = node.op.mode mode = node.op.mode
if nd not in (2, 3):
return
if (img.owner and isinstance(img.owner.op, HostFromGpu)): if (img.owner and isinstance(img.owner.op, HostFromGpu)):
ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]), if img.ndim == nd + 2:
ws, stride=stride, pad=pad, mode=mode) ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
ws, stride=stride, pad=pad, mode=mode)
else:
input = gpu_contiguous(img.owner.inputs[0])
# reshape to 4D or 5D with 2 non-pooling dimensions
input_padded = pad_dims(input, 2, nd)
ret_padded = dnn_pool(input_padded,
ws, stride=stride, pad=pad, mode=mode)
ret = unpad_dims(ret_padded, input, 2, nd)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
...@@ -3003,17 +3018,30 @@ if True: ...@@ -3003,17 +3018,30 @@ if True:
if not node.op.ignore_border: if not node.op.ignore_border:
return return
inp, out, inp_grad, ws, stride, pad = node.inputs inp, out, inp_grad, ws, stride, pad = node.inputs
nd = node.op.ndim if node.op.ndim else (inp.ndim - 2)
mode = node.op.mode mode = node.op.mode
if nd not in (2, 3):
return
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
(out.owner and isinstance(out.owner.op, HostFromGpu)) or (out.owner and isinstance(out.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op, (inp_grad.owner and isinstance(inp_grad.owner.op,
HostFromGpu))): HostFromGpu))):
if inp.ndim == nd + 2:
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
gpu_contiguous(out), gpu_contiguous(out),
gpu_contiguous(inp_grad), gpu_contiguous(inp_grad),
ws, stride, pad) ws, stride, pad)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded = pad_dims(gpu_contiguous(inp), 2, nd)
out_padded = pad_dims(gpu_contiguous(out), 2, nd)
inp_grad_padded = pad_dims(gpu_contiguous(inp_grad), 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_padded,
inp_grad_padded,
ws, stride, pad)
ret = unpad_dims(ret_padded, inp, 2, nd)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
...@@ -3025,16 +3053,28 @@ if True: ...@@ -3025,16 +3053,28 @@ if True:
if not node.op.ignore_border: if not node.op.ignore_border:
return return
inp, inp_grad, ws, stride, pad = node.inputs inp, inp_grad, ws, stride, pad = node.inputs
nd = node.op.ndim if node.op.ndim else (inp.ndim - 2)
mode = node.op.mode mode = node.op.mode
if nd not in (2, 3):
return
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op, (inp_grad.owner and isinstance(inp_grad.owner.op,
HostFromGpu))): HostFromGpu))):
contiguous_inp_grad = gpu_contiguous(inp_grad) if inp.ndim == nd + 2:
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), contiguous_inp_grad = gpu_contiguous(inp_grad)
contiguous_inp_grad, ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
contiguous_inp_grad, contiguous_inp_grad,
ws, stride, pad) contiguous_inp_grad,
ws, stride, pad)
else:
inp_padded = pad_dims(gpu_contiguous(inp), 2, nd)
inp_grad_padded = pad_dims(gpu_contiguous(inp_grad), 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
inp_grad_padded,
inp_grad_padded,
ws, stride, pad)
ret = unpad_dims(ret_padded, inp, 2, nd)
return [host_from_gpu(ret)] return [host_from_gpu(ret)]
@register_opt('cudnn') @register_opt('cudnn')
......
...@@ -40,6 +40,7 @@ from theano.sandbox.cuda.basic_ops import ( ...@@ -40,6 +40,7 @@ from theano.sandbox.cuda.basic_ops import (
GpuSubtensor, GpuAdvancedSubtensor1, GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20, GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit, GpuAllocEmpty) GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit, GpuAllocEmpty)
from theano.sandbox.cuda.opt_util import pad_dims, unpad_dims
from theano.sandbox.cuda.type import CudaNdarrayType from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.blas import ( from theano.sandbox.cuda.blas import (
...@@ -1891,15 +1892,12 @@ def local_convtransp3d_gemm(node): ...@@ -1891,15 +1892,12 @@ def local_convtransp3d_gemm(node):
gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm) gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm)
def _check_constant_args_pool(ws, stride, pad, node): def _check_constant_args_pool(ndim, ws, stride, pad, node):
"""Check if the args of pool are constants. Warns if not.""" """Check if the args of pool are constants. Warns if not."""
try: try:
ws_w = tensor.get_scalar_constant_value(ws[0]) ws = tuple(tensor.get_scalar_constant_value(ws[i]) for i in range(ndim))
ws_h = tensor.get_scalar_constant_value(ws[1]) stride = tuple(tensor.get_scalar_constant_value(stride[i]) for i in range(ndim))
stride_w = tensor.get_scalar_constant_value(stride[0]) pad = tuple(tensor.get_scalar_constant_value(pad[i]) for i in range(ndim))
stride_h = tensor.get_scalar_constant_value(stride[1])
pad_w = tensor.get_scalar_constant_value(pad[0])
pad_h = tensor.get_scalar_constant_value(pad[1])
except tensor.NotScalarConstantError: except tensor.NotScalarConstantError:
msg = ("Pool with tensor variable for the window size, stride or " msg = ("Pool with tensor variable for the window size, stride or "
"padding is only supported in the new GPU backend, so this op " "padding is only supported in the new GPU backend, so this op "
...@@ -1909,65 +1907,96 @@ def _check_constant_args_pool(ws, stride, pad, node): ...@@ -1909,65 +1907,96 @@ def _check_constant_args_pool(ws, stride, pad, node):
elif config.assert_no_cpu_op == "raise": elif config.assert_no_cpu_op == "raise":
raise AssertionError(msg) raise AssertionError(msg)
return None return None
ws = (ws_w, ws_h)
stride = (stride_w, stride_h)
pad = (pad_w, pad_h)
return ws, stride, pad return ws, stride, pad
@register_opt() @register_opt()
@local_optimizer([pool.Pool]) @local_optimizer([pool.Pool])
def local_gpu_downsample_factor_max(node): def local_gpu_downsample_factor_max(node):
if isinstance(node.op, pool.Pool): if (isinstance(node.op, pool.Pool)):
assert node.op.__props__ == ('ignore_border', 'mode') assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, ws, stride, pad = node.inputs x, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node) nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None: if ret is None:
return return
ws, stride, pad = ret ws, stride, pad = ret
if (pad) != (0, 0) or node.op.mode != 'max' or stride != ws: if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return return
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds = GpuDownsampleFactorMax(ws, node.op.ignore_border) gpu_ws = GpuDownsampleFactorMax(ws, node.op.ignore_border)
return [host_from_gpu(gpu_ds(x.owner.inputs[0]))] if node.inputs[0].ndim == 4:
return [host_from_gpu(gpu_ws(x.owner.inputs[0]))]
else:
input_4D = pad_dims(x.owner.inputs[0], 2, 2)
output_4D = gpu_ws(input_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt() @register_opt()
@local_optimizer([pool.MaxPoolGrad]) @local_optimizer([pool.MaxPoolGrad])
def local_gpu_downsample_factor_max_grad(node): def local_gpu_downsample_factor_max_grad(node):
if isinstance(node.op, pool.MaxPoolGrad): if (isinstance(node.op, pool.MaxPoolGrad)):
assert node.op.__props__ == ('ignore_border', 'mode') assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, z, gz, ws, stride, pad = node.inputs x, z, gz, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node) nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None: if ret is None:
return return
ws, stride, pad = ret ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws: if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return return
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds_grad = GpuDownsampleFactorMaxGrad(ws, node.op.ignore_border) gpu_ws_grad = GpuDownsampleFactorMaxGrad(ws, node.op.ignore_border)
return [host_from_gpu(gpu_ds_grad(x.owner.inputs[0], if node.inputs[0].ndim == 4:
as_cuda_ndarray_variable(z), return [host_from_gpu(gpu_ws_grad(x.owner.inputs[0],
as_cuda_ndarray_variable(gz)))] as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gz)))]
else:
x_4D = pad_dims(x.owner.inputs[0], 2, 2)
z_4D = pad_dims(as_cuda_ndarray_variable(z), 2, 2)
gz_4D = pad_dims(as_cuda_ndarray_variable(gz), 2, 2)
output_4D = gpu_ws_grad(x_4D, z_4D, gz_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt() @register_opt()
@local_optimizer([pool.DownsampleFactorMaxGradGrad]) @local_optimizer([pool.DownsampleFactorMaxGradGrad])
def local_gpu_downsample_factor_max_grad_grad(node): def local_gpu_downsample_factor_max_grad_grad(node):
if isinstance(node.op, pool.DownsampleFactorMaxGradGrad): if isinstance(node.op, pool.DownsampleFactorMaxGradGrad):
assert node.op.__props__ == ('ignore_border', 'mode') assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, z, gx, ws, stride, pad = node.inputs x, z, gx, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node) nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None: if ret is None:
return return
ws, stride, pad = ret ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws: if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return return
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
op = GpuDownsampleFactorMaxGradGrad(ws, node.op.ignore_border) op = GpuDownsampleFactorMaxGradGrad(ws, node.op.ignore_border)
return [host_from_gpu(op(x.owner.inputs[0], if node.inputs[0].ndim == 4:
as_cuda_ndarray_variable(z), return [host_from_gpu(op(x.owner.inputs[0],
as_cuda_ndarray_variable(gx)))] as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gx)))]
else:
x_4D = pad_dims(x.owner.inputs[0], 2, 2)
z_4D = pad_dims(as_cuda_ndarray_variable(z), 2, 2)
gx_4D = pad_dims(as_cuda_ndarray_variable(gx), 2, 2)
output_4D = op(x_4D, z_4D, gx_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt() @register_opt()
......
...@@ -3,13 +3,13 @@ from functools import wraps ...@@ -3,13 +3,13 @@ from functools import wraps
import numpy import numpy
from theano import scalar as scal, Constant from theano import tensor, scalar as scal, Constant
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value, from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError) NotScalarConstantError)
from theano.sandbox.cuda.basic_ops import ( from theano.sandbox.cuda.basic_ops import (
GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise) GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise, GpuReshape)
_one = scal.constant(numpy.asarray(1.0, dtype='float32')) _one = scal.constant(numpy.asarray(1.0, dtype='float32'))
...@@ -126,3 +126,48 @@ def output_merge(cls, alpha_in, beta_in, out_in): ...@@ -126,3 +126,48 @@ def output_merge(cls, alpha_in, beta_in, out_in):
return maker(targ, *inputs) return maker(targ, *inputs)
return opt return opt
return wrapper return wrapper
def pad_dims(input, leftdims, rightdims):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert input.ndim >= rightdims
if input.ndim == (leftdims + rightdims):
return input
# extract image dimensions
img_shape = input.shape[-rightdims:]
# count the number of "leading" dimensions, store as dmatrix
batch_size = tensor.prod(input.shape[:-rightdims])
batch_size = tensor.shape_padright(batch_size, 1)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape = tensor.cast(tensor.join(0, batch_size,
tensor.as_tensor([1] * (leftdims - 1)),
img_shape), 'int64')
input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
return input_ND
def unpad_dims(output, input, leftdims, rightdims):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if output.ndim == input.ndim:
return output
# restore the output to the original shape
outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:])
return GpuReshape(input.ndim)(output, outshp)
...@@ -326,7 +326,9 @@ if 0: ...@@ -326,7 +326,9 @@ if 0:
def test_downsample(): def test_downsample():
shps = [(1, 1, 1, 12), shps = [(1, 12),
(1, 1, 12),
(1, 1, 1, 12),
(1, 1, 2, 2), (1, 1, 2, 2),
(1, 1, 1, 1), (1, 1, 1, 1),
(1, 1, 4, 4), (1, 1, 4, 4),
...@@ -359,17 +361,17 @@ def test_downsample(): ...@@ -359,17 +361,17 @@ def test_downsample():
for shp in shps: for shp in shps:
for ds in (2, 2), (3, 2), (1, 1): for ds in (2, 2), (3, 2), (1, 1):
if ds[0] > shp[2]: if ds[0] > shp[-2]:
continue continue
if ds[1] > shp[3]: if ds[1] > shp[-1]:
continue continue
# GpuDownsampleFactorMax doesn't like having more than 512 columns # GpuDownsampleFactorMax doesn't like having more than 512 columns
# in the output tensor. # in the output tensor.
if float(shp[3]) / ds[1] > 512: if float(shp[-1]) / ds[1] > 512:
continue continue
for ignore_border in (True, False): for ignore_border in (True, False):
# print 'test_downsample', shp, ds, ignore_border # print 'test_downsample', shp, ds, ignore_border
ds_op = Pool(ignore_border=ignore_border) ds_op = Pool(ndim=len(ds), ignore_border=ignore_border)
a = tcn.shared_constructor(my_rand(*shp), 'a') a = tcn.shared_constructor(my_rand(*shp), 'a')
f = pfunc([], ds_op(tensor.as_tensor_variable(a), ds), f = pfunc([], ds_op(tensor.as_tensor_variable(a), ds),
......
...@@ -15,8 +15,8 @@ import theano ...@@ -15,8 +15,8 @@ import theano
import theano.tensor as T import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs from theano.sandbox.neighbours import images2neibs
from theano.tensor.signal.pool import pool_2d from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import MaxPoolGrad, AveragePoolGrad from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
import theano.sandbox.cuda.dnn as dnn import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
from theano.sandbox.cuda import float32_shared_constructor as shared from theano.sandbox.cuda import float32_shared_constructor as shared
...@@ -170,7 +170,7 @@ def test_dnn_conv_inplace(): ...@@ -170,7 +170,7 @@ def test_dnn_conv_inplace():
def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0), def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
pool_func=T.max, mode='ignore_borders'): pool_function=T.max, mode='ignore_borders'):
if strides is None: if strides is None:
strides = ds strides = ds
...@@ -179,13 +179,13 @@ def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0), ...@@ -179,13 +179,13 @@ def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
# resahpe to B, C*0, 1, 2 and do the pooling on 1, 2 # resahpe to B, C*0, 1, 2 and do the pooling on 1, 2
first = input.reshape((shape[0], shape[1] * shape[2], shape[3], shape[4])) first = input.reshape((shape[0], shape[1] * shape[2], shape[3], shape[4]))
pooled1 = pool_2d_i2n(first, ds=ds[1:], strides=strides[1:], pad=pad[1:], pooled1 = pool_2d_i2n(first, ds=ds[1:], strides=strides[1:], pad=pad[1:],
pool_function=pool_func, mode=mode) pool_function=pool_function, mode=mode)
shp1 = pooled1.shape shp1 = pooled1.shape
# reshape to B, C, 0, 1*2 and do the pooling on 0 # reshape to B, C, 0, 1*2 and do the pooling on 0
second = pooled1.reshape((shape[0], shape[1], shape[2], shp1[2] * shp1[3])) second = pooled1.reshape((shape[0], shape[1], shape[2], shp1[2] * shp1[3]))
pooled2 = pool_2d_i2n(second, ds=(ds[0], 1), strides=(strides[0], 1), pooled2 = pool_2d_i2n(second, ds=(ds[0], 1), strides=(strides[0], 1),
pad=(pad[0], 0), pool_function=pool_func, mode=mode) pad=(pad[0], 0), pool_function=pool_function, mode=mode)
shp2 = pooled2.shape shp2 = pooled2.shape
return pooled2.reshape((shape[0], shape[1], shp2[2], shp1[2], shp1[3])) return pooled2.reshape((shape[0], shape[1], shp2[2], shp1[2], shp1[3]))
...@@ -241,8 +241,6 @@ def test_pooling(): ...@@ -241,8 +241,6 @@ def test_pooling():
func = T.max func = T.max
else: else:
func = T.mean func = T.mean
if pad != (0, 0) and cuda.dnn.version() == -1:
continue
if pad != (0, 0) and func is T.mean: if pad != (0, 0) and func is T.mean:
continue continue
...@@ -418,6 +416,7 @@ def test_pooling3d(): ...@@ -418,6 +416,7 @@ def test_pooling3d():
if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000): if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000):
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
# We force the FAST_RUN as we don't want the reference to run in DebugMode.
mode_without_gpu_ref = theano.compile.mode.get_mode( mode_without_gpu_ref = theano.compile.mode.get_mode(
'FAST_RUN').excluding('gpu') 'FAST_RUN').excluding('gpu')
...@@ -427,8 +426,7 @@ def test_pooling3d(): ...@@ -427,8 +426,7 @@ def test_pooling3d():
else: else:
modes = ('max', 'average_inc_pad', 'average_exc_pad') modes = ('max', 'average_inc_pad', 'average_exc_pad')
x = T.TensorType(broadcastable=(False, False, False, False, False), x = T.ftensor5()
dtype='float32')()
for mode, pad in product(modes, for mode, pad in product(modes,
((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1), ((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1),
(2, 3, 2), (3, 2, 2), (2, 2, 3))): (2, 3, 2), (3, 2, 2), (2, 2, 3))):
...@@ -436,8 +434,6 @@ def test_pooling3d(): ...@@ -436,8 +434,6 @@ def test_pooling3d():
func = T.max func = T.max
else: else:
func = T.mean func = T.mean
if pad != (0, 0, 0) and cuda.dnn.version() == -1:
continue
if pad != (0, 0, 0) and func is T.mean: if pad != (0, 0, 0) and func is T.mean:
continue continue
...@@ -449,13 +445,13 @@ def test_pooling3d(): ...@@ -449,13 +445,13 @@ def test_pooling3d():
if pad[0] > stride or pad[1] > stride or pad[2] > stride: if pad[0] > stride or pad[1] > stride or pad[2] > stride:
# Not implemented # Not implemented
continue continue
out1 = cuda.dnn.dnn_pool(x, (ws, ws, ws), out1 = pool_3d(x, (ws, ws, ws),
stride=(stride, stride, stride), st=(stride, stride, stride),
pad=pad, mode=mode) ignore_border=True,
out2 = pool3d2d(x, ds=(ws, ws, ws), padding=pad, mode=mode)
strides=(stride, stride, stride), out2 = pool3d2d(x, ds=(ws, ws, ws), strides=(stride, stride, stride),
pad=pad, pool_func=func) pad=pad,
pool_function=func)
f1 = theano.function([x], out1, mode=mode_with_gpu) f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool) assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes]) for node in f1.maker.fgraph.apply_nodes])
...@@ -510,11 +506,17 @@ def test_pooling3d(): ...@@ -510,11 +506,17 @@ def test_pooling3d():
g_out = fg(data) g_out = fg(data)
# Compare again the CPU result # Compare again the CPU result
out = pool3d2d(x, (ws, ws, ws), out = pool_3d(x, (ws, ws, ws),
strides=(stride, stride, stride), padding=pad,
pad=pad, pool_func=func) ignore_border=True, mode=mode)
fc = theano.function([x], theano.grad(out.sum(), x), fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu_ref) mode=mode_without_gpu_ref)
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
c_out = fc(data) c_out = fc(data)
utt.assert_allclose(c_out, g_out) utt.assert_allclose(c_out, g_out)
...@@ -523,6 +525,7 @@ def test_pooling_opt(): ...@@ -523,6 +525,7 @@ def test_pooling_opt():
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
# 2D pooling
x = T.fmatrix() x = T.fmatrix()
f = theano.function( f = theano.function(
...@@ -535,6 +538,7 @@ def test_pooling_opt(): ...@@ -535,6 +538,7 @@ def test_pooling_opt():
f(numpy.zeros((10, 10), dtype='float32')) f(numpy.zeros((10, 10), dtype='float32'))
# gradient of 2D pooling
f = theano.function( f = theano.function(
[x], [x],
T.grad(pool_2d(x, ds=(2, 2), mode='average_inc_pad', T.grad(pool_2d(x, ds=(2, 2), mode='average_inc_pad',
...@@ -545,6 +549,7 @@ def test_pooling_opt(): ...@@ -545,6 +549,7 @@ def test_pooling_opt():
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10), dtype='float32')) f(numpy.zeros((10, 10), dtype='float32'))
# Test sum pooling # Test sum pooling
f = theano.function( f = theano.function(
[x], [x],
...@@ -557,6 +562,82 @@ def test_pooling_opt(): ...@@ -557,6 +562,82 @@ def test_pooling_opt():
data = numpy.random.rand(10, 10).astype('float32') data = numpy.random.rand(10, 10).astype('float32')
f(data) f(data)
# 3D pooling
x = T.ftensor3()
f = theano.function(
[x],
pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad', ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
# gradient of 3D pooling
f = theano.function(
[x],
T.grad(pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad',
ignore_border=True).sum(), x),
mode=mode_with_gpu.including("cudnn"))
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
def test_pooling_opt_arbitrary_dimensions():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
# 'average_exc_pad' is disabled for versions < 4004
if cuda.dnn.version() < (4004, 4004):
modes = ('max', 'average_inc_pad')
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
for n_non_pool_dims in (0, 1, 2, 3):
for ws in ((2, 2), (3, 3, 3)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp = (2,) * n_non_pool_dims + (5,) * len(ws)
data = numpy.random.normal(0, 1, shp).astype('float32')
input = shared(data)
for mode in modes:
out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws)
out_pool_grad = T.grad(T.sum(out_pool), wrt=input)
out = [out_pool, out_pool_grad]
# run on GPU
fg = theano.function([], out, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in fg.maker.fgraph.toposort()])
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
res_gpu = fg()
# run on CPU
fc = theano.function([], out, mode=mode_without_gpu)
assert any([isinstance(node.op, Pool)
for node in fc.maker.fgraph.toposort()])
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
res_cpu = fg()
# check for similarity
utt.assert_allclose(res_gpu[0], res_cpu[0])
utt.assert_allclose(res_gpu[1], res_cpu[1])
class test_DnnSoftMax(test_nnet.test_SoftMax): class test_DnnSoftMax(test_nnet.test_SoftMax):
gpu_op = dnn.GpuDnnSoftmax gpu_op = dnn.GpuDnnSoftmax
......
...@@ -7,6 +7,7 @@ Pool, DownsampleAvg, DownsampleSoftmax. ...@@ -7,6 +7,7 @@ Pool, DownsampleAvg, DownsampleSoftmax.
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
# This file should move along with conv.py # This file should move along with conv.py
import warnings import warnings
import itertools
import numpy import numpy
from six.moves import xrange from six.moves import xrange
...@@ -84,67 +85,101 @@ def pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), ...@@ -84,67 +85,101 @@ def pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
" Otherwise, the convolution will be executed on CPU.", " Otherwise, the convolution will be executed on CPU.",
stacklevel=2) stacklevel=2)
ignore_border = False ignore_border = False
if input.ndim == 4: op = Pool(ignore_border, ndim=2, mode=mode)
op = Pool(ignore_border, mode=mode) output = op(input, ds, st, padding)
output = op(input, ds, st, padding) return output
return output
# extract image dimensions
img_shape = input.shape[-2:]
# count the number of "leading" dimensions, store as dmatrix def pool_3d(input, ds, ignore_border=None, st=None, padding=(0, 0, 0),
batch_size = tensor.prod(input.shape[:-2]) mode='max'):
batch_size = tensor.shape_padright(batch_size, 1) """Downscale the input by a specified factor
# store as 4D tensor with shape: (batch_size,1,height,width) Takes as input a N-D tensor, where N >= 3. It downscales the input image by
new_shape = tensor.cast(tensor.join(0, batch_size, the specified factor, by keeping only the maximum value of non-overlapping
tensor.as_tensor([1]), patches of size (ds[0],ds[1],ds[2])
img_shape), 'int64')
input_4D = tensor.reshape(input, new_shape, ndim=4)
# downsample mini-batch of images Parameters
op = Pool(ignore_border, mode=mode) ----------
output = op(input_4D, ds, st, padding) input : N-D theano tensor of input images
Input images. Max pooling will be done over the 3 last dimensions.
ds : tuple of length 3 or theano vector of ints of size 3
Factor by which to downscale (vertical ds, horizontal ds, depth ds).
(2,2,2) will halve the image in each dimension.
ignore_border : bool (default None, will print a warning and set to False)
When True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output.
(3,3,3) otherwise.
st : tuple of three ints or theano vector of ints of size 3
Stride size, which is the number of shifts over rows/cols/slices to get
the next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding : tuple of two ints or theano vector of ints of size 3
(pad_h, pad_w, pad_d), pad zeros to extend beyond six borders of the
images, pad_h is the size of the top and bottom margins,
pad_w is the size of the left and right margins, and pad_d is the size
of the front and back margins
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
Operation executed on each window. `max` and `sum` always exclude
the padding in the computation. `average` gives you the choice to
include or exclude it.
# restore to original shape """
outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) if input.ndim < 3:
return tensor.reshape(output, outshp, ndim=input.ndim) raise NotImplementedError('pool_3d requires a dimension >= 3')
if ignore_border is None:
warnings.warn(
"pool_3d() will have the parameter ignore_border"
" default value changed to True (currently"
" False). To have consistent behavior with all Theano"
" version, explicitly add the parameter ignore_border=True."
" On the GPU, using ignore_border=True is needed to use cuDNN."
" When using ignore_border=False and not using cuDNN, the only"
" GPU combination supported is when"
" `ds == st and padding == (0, 0, 0) and mode == 'max'`."
" Otherwise, the convolution will be executed on CPU.",
stacklevel=2)
ignore_border = False
op = Pool(ignore_border, ndim=3, mode=mode)
output = op(input, ds, st, padding)
return output
class Pool(OpenMPOp): class Pool(OpenMPOp):
""" """
For N-dimensional tensors, consider that the last two dimensions span This Op downsamples the last N dimensions of the input by taking the max,
images. This Op downsamples these images by taking the max, sum or average sum or average over different patches.
over different patch.
The constructor takes the max, sum or average or different input patches.
Parameters Parameters
---------- ----------
ds : list or tuple of two ints ds : list or tuple of N ints
Downsample factor over rows and column. Downsample factor over rows, columns etc.
ds indicates the pool region size. ds indicates the size of the pooling region.
ignore_border : bool ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col If ds doesn't divide imgshape, do we include an extra row/col/slice
of partial downsampling (False) or ignore it (True). of partial downsampling (False) or ignore it (True).
st : list or tuple of two ints or None st : list or tuple of N ints or None
Stride size, which is the number of shifts over rows/cols to get the Stride size, which is the number of shifts over rows/cols/slices to get the
next pool region. If st is None, it is considered equal to ds next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions). (no overlap on pooling regions).
padding: tuple of two ints padding : tuple of N ints or None
(pad_h, pad_w), pad zeros to extend beyond four borders of the images, For each downsampling dimension, this specifies the number of zeros to
pad_h is the size of the top and bottom margins, and pad_w is the size add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
of the left and right margins. size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'} mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
('average_inc_pad' excludes the padding from the count, ('average_inc_pad' excludes the padding from the count,
'average_exc_pad' include it) 'average_exc_pad' include it)
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
""" """
__props__ = ('ignore_border', 'mode') __props__ = ('ndim', 'ignore_border', 'mode')
@staticmethod @staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)): def out_shape(imgshape, ds, ignore_border=False, st=None, padding=None, ndim=None):
""" """
Return the shape of the output from this op, for input of given Return the shape of the output from this op, for input of given
shape and flags. shape and flags.
...@@ -152,88 +187,79 @@ class Pool(OpenMPOp): ...@@ -152,88 +187,79 @@ class Pool(OpenMPOp):
Parameters Parameters
---------- ----------
imgshape : tuple, list, or similar of integer or scalar Theano variable imgshape : tuple, list, or similar of integer or scalar Theano variable
The shape of a tensor of images. The last two elements are The shape of a tensor of images. The last N elements are
interpreted as the number of rows, and the number of cols. interpreted as the number of rows, and the number of cols.
ds : list or tuple of two ints ds : list or tuple of N ints
Downsample factor over rows and columns this parameter indicates Downsample factor over rows and column.
the size of the pooling region. ds indicates the pool region size.
st : list or tuple of two ints
The stride size. This is the distance between the pooling regions.
If it's set to None, it equals ds.
ignore_border : bool ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col of If ds doesn't divide imgshape, do we include an extra row/col/slice
partial downsampling (False) or ignore it (True). of partial downsampling (False) or ignore it (True).
padding : tuple of two ints st : list or tuple of N ints or None
(pad_h, pad_w), pad zeros to extend beyond four borders Stride size, which is the number of shifts over rows/cols/slices to get the
of the images, pad_h is the size of the top and bottom margins, next pool region. If st is None, it is considered equal to ds
and pad_w is the size of the left and right margins. (no overlap on pooling regions).
padding : tuple of N ints or None
For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
Returns Returns
------- -------
list list
The shape of the output from this op, for input of given shape. The shape of the output from this op, for input of given shape.
This will have the same length as imgshape, but with last two This will have the same length as imgshape, but with last N
elements reduced as per the downsampling & ignore_border flags. elements reduced as per the downsampling & ignore_border flags.
""" """
if len(imgshape) < 2: if ndim is None:
raise TypeError('imgshape must have at least two elements ' ndim = len(imgshape) - 2
'(rows, cols)')
if len(imgshape) < ndim:
raise TypeError('imgshape must have at least {} dimensions'.format(ndim))
if st is None: if st is None:
st = ds st = ds
r, c = imgshape[-2:] if padding is None:
r = tensor.extract_constant(r) padding = (0,) * ndim
c = tensor.extract_constant(c) patch_shape = tuple(tensor.extract_constant(imgshape[-ndim + i]) + padding[i] * 2
if padding[0]: for i in xrange(ndim))
r = r + padding[0] * 2
if padding[1]: def compute_out(v, downsample, stride):
c = c + padding[1] * 2 if ignore_border:
if downsample == stride:
if ignore_border: return v // stride
if ds[0] == st[0]:
nr = r // st[0]
else:
out_r = (r - ds[0]) // st[0] + 1
if isinstance(r, theano.Variable):
nr = tensor.maximum(out_r, 0)
else: else:
nr = numpy.maximum(out_r, 0) out = (v - downsample) // stride + 1
if isinstance(out, theano.Variable):
if ds[1] == st[1]: return tensor.maximum(out, 0)
nc = c // st[1] else:
return numpy.maximum(out, 0)
else: else:
out_c = (c - ds[1]) // st[1] + 1 if isinstance(v, theano.Variable):
if isinstance(c, theano.Variable): return tensor.switch(tensor.ge(stride, downsample),
nc = tensor.maximum(out_c, 0) (v - 1) // stride + 1,
tensor.maximum(0, (v - 1 - downsample) //
stride + 1) + 1)
elif stride >= downsample:
return (v - 1) // stride + 1
else: else:
nc = numpy.maximum(out_c, 0) return max(0, (v - 1 - downsample + stride) // stride) + 1
else:
if isinstance(r, theano.Variable): out_shape = [compute_out(patch_shape[i], ds[i], st[i]) for i in xrange(ndim)]
nr = tensor.switch(tensor.ge(st[0], ds[0]),
(r - 1) // st[0] + 1,
tensor.maximum(0, (r - 1 - ds[0]) //
st[0] + 1) + 1)
elif st[0] >= ds[0]:
nr = (r - 1) // st[0] + 1
else:
nr = max(0, (r - 1 - ds[0] + st[0]) // st[0]) + 1
if isinstance(c, theano.Variable):
nc = tensor.switch(tensor.ge(st[1], ds[1]),
(c - 1) // st[1] + 1,
tensor.maximum(0, (c - 1 - ds[1]) //
st[1] + 1) + 1)
elif st[1] >= ds[1]:
nc = (c - 1) // st[1] + 1
else:
nc = max(0, (c - 1 - ds[1] + st[1]) // st[1]) + 1
rval = list(imgshape[:-2]) + [nr, nc] rval = list(imgshape[:-ndim]) + out_shape
return rval return rval
def __init__(self, ignore_border=False, mode='max', openmp=None): def __init__(self, ignore_border=False, mode='max', ndim=None, openmp=None):
super(Pool, self).__init__(openmp=openmp) super(Pool, self).__init__(openmp=openmp)
self.ndim = ndim
self.ignore_border = ignore_border self.ignore_border = ignore_border
if mode not in ['max', 'average_inc_pad', 'average_exc_pad', 'sum']: if mode not in ['max', 'average_inc_pad', 'average_exc_pad', 'sum']:
raise ValueError( raise ValueError(
...@@ -244,6 +270,7 @@ class Pool(OpenMPOp): ...@@ -244,6 +270,7 @@ class Pool(OpenMPOp):
def prepare_node(self, node, storage_map, compute_map): def prepare_node(self, node, storage_map, compute_map):
if len(node.inputs) == 1: if len(node.inputs) == 1:
# Old interface # Old interface
self.ndim = len(node.op.ds)
self.mode = node.op.mode self.mode = node.op.mode
ws = theano.tensor.constant(node.op.ds) ws = theano.tensor.constant(node.op.ds)
st = theano.tensor.constant(node.op.st) st = theano.tensor.constant(node.op.st)
...@@ -270,17 +297,22 @@ class Pool(OpenMPOp): ...@@ -270,17 +297,22 @@ class Pool(OpenMPOp):
storage_map[pad] = [None] storage_map[pad] = [None]
compute_map[pad] = [False] compute_map[pad] = [False]
def make_node(self, x, ws, stride=None, pad=(0, 0)): def make_node(self, x, ws, stride=None, pad=None):
# TODO: consider restricting the dtype? # TODO: consider restricting the dtype?
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
nd = self.ndim
if nd is None:
nd = x.type.ndim - 2
if stride is None: if stride is None:
stride = ws stride = ws
if isinstance(pad, (tuple, list)): if pad is None:
if tuple(pad) != (0, 0) and not self.ignore_border: pad = (0,) * nd
elif isinstance(pad, (tuple, list)):
if max(pad) != 0 and not self.ignore_border:
raise NotImplementedError( raise NotImplementedError(
'padding works only with ignore_border=True') 'padding works only with ignore_border=True')
if isinstance(ws, (tuple, list)): if isinstance(ws, (tuple, list)):
if pad[0] >= ws[0] or pad[1] >= ws[1]: if any(pad[i] >= ws[i] for i in range(nd)):
raise NotImplementedError( raise NotImplementedError(
'padding_h and padding_w must be smaller than strides') 'padding_h and padding_w must be smaller than strides')
ws = tensor.as_tensor_variable(ws) ws = tensor.as_tensor_variable(ws)
...@@ -289,7 +321,7 @@ class Pool(OpenMPOp): ...@@ -289,7 +321,7 @@ class Pool(OpenMPOp):
assert ws.ndim == 1 assert ws.ndim == 1
assert stride.ndim == 1 assert stride.ndim == 1
assert pad.ndim == 1 assert pad.ndim == 1
if x.type.ndim != 4: if x.type.ndim < nd:
raise TypeError() raise TypeError()
if not ws.dtype.startswith('int'): if not ws.dtype.startswith('int'):
raise TypeError('Pool downsample parameters must be ints.') raise TypeError('Pool downsample parameters must be ints.')
...@@ -298,42 +330,36 @@ class Pool(OpenMPOp): ...@@ -298,42 +330,36 @@ class Pool(OpenMPOp):
if not pad.dtype.startswith('int'): if not pad.dtype.startswith('int'):
raise TypeError('Padding parameters must be ints.') raise TypeError('Padding parameters must be ints.')
# If the input shape are broadcastable we can have 0 in the output shape # If the input shape are broadcastable we can have 0 in the output shape
broad = x.broadcastable[:2] + (False, False) broad = x.broadcastable[:-nd] + (False,) * nd
out = tensor.TensorType(x.dtype, broad) out = tensor.TensorType(x.dtype, broad)
return gof.Apply(self, [x, ws, stride, pad], [out()]) return gof.Apply(self, [x, ws, stride, pad], [out()])
def perform(self, node, inp, out): def perform(self, node, inp, out):
x, ws, stride, pad = inp x, ws, stride, pad = inp
z, = out z, = out
assert ws.shape == stride.shape == pad.shape == (2,) nd = self.ndim
if len(x.shape) != 4: if nd is None:
nd = len(x.shape) - 2
assert ws.shape == stride.shape == pad.shape == (nd,)
if len(x.shape) < nd:
raise NotImplementedError( raise NotImplementedError(
'Pool requires 4D input for now') 'Pool requires input with {} or more dimensions'.format(nd))
z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad) z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad, nd)
if not self.ignore_border: if not self.ignore_border:
assert z_shape[2] > 0 assert all(z > 0 for z in z_shape[-nd:])
assert z_shape[3] > 0
if (z[0] is None) or (z[0].shape != z_shape): if (z[0] is None) or (z[0].shape != z_shape):
z[0] = numpy.empty(z_shape, dtype=x.dtype) z[0] = numpy.empty(z_shape, dtype=x.dtype)
zz = z[0] zz = z[0]
# number of pooling output rows # size of pooling output
pr = zz.shape[-2] pool_out_shp = zz.shape[-nd:]
# number of pooling output cols img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in xrange(nd))
pc = zz.shape[-1]
ws0, ws1 = ws
st0, st1 = stride
pad_h = pad[0]
pad_w = pad[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
inc_pad = self.mode == 'average_inc_pad' inc_pad = self.mode == 'average_inc_pad'
# pad the image # pad the image
if (pad_h, pad_w) != (0, 0): if max(pad) != 0:
y = numpy.zeros( y = numpy.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
(x.shape[0], x.shape[1], img_rows, img_cols), y[(slice(None),) * (len(x.shape) - nd) +
dtype=x.dtype) tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))] = x
y[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)] = x
else: else:
y = x y = x
func = numpy.max func = numpy.max
...@@ -342,28 +368,30 @@ class Pool(OpenMPOp): ...@@ -342,28 +368,30 @@ class Pool(OpenMPOp):
elif self.mode != 'max': elif self.mode != 'max':
func = numpy.average func = numpy.average
for n in xrange(x.shape[0]): # precompute the region boundaries for each dimension
for k in xrange(x.shape[1]): region_slices = [[] for i in xrange(nd)]
for r in xrange(pr): for i in xrange(nd):
row_st = r * st0 for j in xrange(pool_out_shp[i]):
row_end = builtins.min(row_st + ws0, img_rows) start = j * stride[i]
if not inc_pad: end = builtins.min(start + ws[i], img_shp[i])
row_st = builtins.max(row_st, pad_h) if not inc_pad:
row_end = builtins.min(row_end, x.shape[-2] + pad_h) start = builtins.max(start, pad[i])
for c in xrange(pc): end = builtins.min(end, img_shp[i] - pad[i])
col_st = c * st1 region_slices[i].append(slice(start, end))
col_end = builtins.min(col_st + ws1, img_cols)
if not inc_pad: # iterate over non-pooling dimensions
col_st = builtins.max(col_st, pad_w) for k in numpy.ndindex(*x.shape[:-nd]):
col_end = builtins.min(col_end, zzk = zz[k]
x.shape[-1] + pad_w) yk = y[k]
zz[n, k, r, c] = func(y[ # iterate over pooling regions
n, k, row_st:row_end, col_st:col_end]) for r in numpy.ndindex(*pool_out_shp):
zzk[r] = func(
yk[[region_slices[i][r[i]] for i in xrange(nd)]])
def infer_shape(self, node, in_shapes): def infer_shape(self, node, in_shapes):
ws, stride, pad = [node.inputs[1], node.inputs[2], node.inputs[3]] ws, stride, pad = [node.inputs[1], node.inputs[2], node.inputs[3]]
shp = self.out_shape(in_shapes[0], ws, self.ignore_border, stride, shp = self.out_shape(in_shapes[0], ws, self.ignore_border, stride,
pad) pad, self.ndim)
return [shp] return [shp]
def grad(self, inp, grads): def grad(self, inp, grads):
...@@ -372,10 +400,12 @@ class Pool(OpenMPOp): ...@@ -372,10 +400,12 @@ class Pool(OpenMPOp):
disc = [DisconnectedType()() for i in inp[1:]] disc = [DisconnectedType()() for i in inp[1:]]
if self.mode == 'max': if self.mode == 'max':
maxout = self(x, ws, stride, pad) maxout = self(x, ws, stride, pad)
return [MaxPoolGrad(ignore_border=self.ignore_border)( return [MaxPoolGrad(ndim=self.ndim,
ignore_border=self.ignore_border)(
x, maxout, gz, ws=ws, stride=stride, pad=pad)] + disc x, maxout, gz, ws=ws, stride=stride, pad=pad)] + disc
else: else:
return [AveragePoolGrad(ignore_border=self.ignore_border, return [AveragePoolGrad(ndim=self.ndim,
ignore_border=self.ignore_border,
mode=self.mode)( mode=self.mode)(
x, gz, ws=ws, stride=stride, pad=pad)] + disc x, gz, ws=ws, stride=stride, pad=pad)] + disc
...@@ -392,292 +422,386 @@ class Pool(OpenMPOp): ...@@ -392,292 +422,386 @@ class Pool(OpenMPOp):
raise theano.gof.utils.MethodNotDefined() raise theano.gof.utils.MethodNotDefined()
x, ws, stride, pad = inp x, ws, stride, pad = inp
z, = out z, = out
nd = self.ndim
if nd is None:
nd = node.inputs[0].ndim - 2
total_ndim = node.inputs[0].ndim
non_pool_ndim = total_ndim - nd
fail = sub['fail'] fail = sub['fail']
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
if self.openmp: if self.openmp:
omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, collector) schedule(static)' # run in parallel over each pooling block
omp_parallel = '#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, collector) schedule(static)'
else: else:
omp_parallel = '' omp_parallel = ''
ccode = """ ccode = """
int ws0, ws1, st0, st1, pd0, pd1;
int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0); int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int z_r, z_c; // shape of the output if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
int r, c; // shape of the padded_input
if(PyArray_DIM(%(ws)s, 0)!=2)
{ {
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size 2"); PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
%(fail)s; %(fail)s;
} }
if(PyArray_DIM(%(stride)s, 0)!=2) if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
{ {
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size 2"); PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
%(fail)s; %(fail)s;
} }
if(PyArray_DIM(%(pad)s, 0)!=2) if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
{ {
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size 2"); PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
%(fail)s; %(fail)s;
} }
// Getting ws, stride and pad if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
ws0 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 1));
if(PyArray_NDIM(%(x)s)!=4)
{ {
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
%(fail)s;
}
int z[%(nd)s]; // shape of the output
int r[%(nd)s]; // shape of the padded_input
int ws[%(nd)s];
int st[%(nd)s];
int pd[%(nd)s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<%(nd)s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(%(stride)s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
if (!%(ignore_border)s && nonzero_padding)
{
PyErr_SetString(PyExc_ValueError,
"padding must be zero when ignore border is False");
%(fail)s; %(fail)s;
} }
r = PyArray_DIMS(%(x)s)[2];
c = PyArray_DIMS(%(x)s)[3];
r += pd0 * 2;
c += pd1 * 2;
if (pd0 != 0 && pd1 != 0 && !%(ignore_border)s)
{
PyErr_SetString(PyExc_ValueError,
"padding must be (0,0) when ignore border is False");
%(fail)s;
}
if (%(ignore_border)s) if (%(ignore_border)s)
{ {
// '/' in C is different from '/' in python for (int i=0; i<%(nd)s; i++)
if (r - ws0 < 0)
{ {
z_r = 0; // '/' in C is different from '/' in python
} if (r[i] - ws[i] < 0)
else {
{ z[i] = 0;
z_r = (r - ws0) / st0 + 1; }
} else
if (c - ws1 < 0) {
{ z[i] = (r[i] - ws[i]) / st[i] + 1;
z_c = 0; }
}
else
{
z_c = (c - ws1) / st1 + 1;
} }
} }
else else
{ {
// decide how many rows the output has for (int i=0; i<%(nd)s; i++)
if (st0 >= ws0)
{ {
z_r = (r - 1) / st0 + 1; // decide how many rows/cols the output has
} if (st[i] >= ws[i])
else {
{ z[i] = (r[i] - 1) / st[i] + 1;
z_r = std::max(0, (r - 1 - ws0 + st0) / st0) + 1; }
else
{
z[i] = std::max(0, (r[i] - 1 - ws[i] + st[i]) / st[i]) + 1;
}
assert(z[i] > 0);
} }
// decide how many columns the output has }
if (st1 >= ws1) // memory allocation of z if necessary
int mem_nec;
mem_nec = 0;
if ((!%(z)s) || *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<%(non_pool_ndim)s; i++)
{ {
z_c = (c - 1) / st1 + 1; if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(x)s)[i])
{
mem_nec = 1;
break;
}
} }
else }
if (!mem_nec)
{
for (int i=0; i<%(nd)s; i++)
{ {
z_c = std::max(0, (c - 1 - ws1 + st0) / st1) + 1; if (PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i] != z[i])
{
mem_nec = 1;
break;
}
} }
assert(z_r > 0);
assert(z_c > 0);
} }
// memory allocation of z if necessary if (mem_nec)
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(x)s)[0])
||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(x)s)[1])
||(PyArray_DIMS(%(z)s)[2] != z_r)
||(PyArray_DIMS(%(z)s)[3] != z_c)
)
{ {
if (%(z)s) Py_XDECREF(%(z)s); if (%(z)s) Py_XDECREF(%(z)s);
npy_intp dims[4] = {0,0,0,0}; npy_intp dims[%(total_ndim)s];
dims[0]=PyArray_DIMS(%(x)s)[0]; for (int i=0; i<%(non_pool_ndim)s; i++)
dims[1]=PyArray_DIMS(%(x)s)[1]; {
dims[2]=z_r; dims[i] = PyArray_DIMS(%(x)s)[i];
dims[3]=z_c; }
for (int i=0; i<%(nd)s; i++)
{
dims[%(non_pool_ndim)s + i] = z[i];
}
//TODO: zeros not necessary //TODO: zeros not necessary
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0); %(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, dims, typenum,0);
}
// initialize temp var for the value in a region
dtype_%(x)s collector;
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<%(nd)s; i++)
{
z_prod *= z[i];
} }
// used for indexing a pool region inside the input if (z_prod)
dtype_%(x)s collector; // temp var for the value in a region
if (z_r && z_c)
{ {
int r_st, r_end, c_st, c_end; // will be used to hold start and end index of a region
int r_st[%(nd)s];
int r_end[%(nd)s];
// index for iterating over the pooling regions
int r_idx[%(nd)s];
// placeholder for PyArray indexing (output)
long int o_idx[%(total_ndim)s];
// placeholder for PyArray indexing (input)
long int i_idx[%(total_ndim)s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<%(non_pool_ndim)s; i++)
{
non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
}
%(omp_parallel)s %(omp_parallel)s
for(int t = 0; t < PyArray_DIMS(%(x)s)[0] * PyArray_DIMS(%(x)s)[1]; t++){ // first loop over non-pooling dimensions
int b = t %% PyArray_DIMS(%(x)s)[0]; for (int t=0; t<non_pooling_prod; t++)
int k = t / PyArray_DIMS(%(x)s)[0]; {
for(int i=0; i < z_r; i++){ // compute the non-pooling index in each dimension
r_st = i * st0; if (%(non_pool_ndim)s!=0)
r_end = r_st + ws0; {
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<%(non_pool_ndim)s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for i in xrange(nd):
ccode += """
for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
// skip the padding // skip the padding
r_st = r_st < pd0 ? pd0 : r_st; r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
r_end = r_end > (r - pd0) ? r - pd0 : r_end; r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
// from padded_img space to img space // from padded_img space to img space
r_st -= pd0; r_st[%(i)s] -= pd[%(i)s];
r_end -= pd0; r_end[%(i)s] -= pd[%(i)s];
// handle the case where no padding, ignore border is True // handle the case where no padding, ignore border is True
if (%(ignore_border)s) if (%(ignore_border)s)
{ {
r_end = r_end > r ? r : r_end; r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
} }
for(int j=0; j<z_c; j++){ // use the index to find the correct position in the output
c_st = j * st1; o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
c_end = c_st + ws1; """ % dict(i=i, ignore_border=ignore_border, non_pool_ndim=non_pool_ndim)
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st; ccode += """
c_end = c_end > (c - pd1) ? c - pd1 : c_end; // get a pointer to the correct position in the output
dtype_%(z)s * z = ( dtype_%(z)s * z;
(dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, b, k, i, j))); if (%(total_ndim)s == 4)
// change coordinates from padding_img space into img space z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, o_idx[0], o_idx[1], o_idx[2], o_idx[3])));
c_st -= pd1; else
c_end -= pd1; z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s, o_idx)));
// handle the case where no padding, ignore border is True
if (%(ignore_border)s)
{
c_end = c_end > c ? c : c_end;
}
""" """
if self.mode == 'max': if self.mode == 'max':
for i in xrange(nd):
ccode += """
// set the first index of dimension %(i)s
i_idx[%(non_pool_ndim)s + %(i)s] = r_st[%(i)s];
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """ ccode += """
// use the first element as the initial value of collector // use the first element as the initial value of collector
collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,r_st,c_st)))[0]; if (%(total_ndim)s == 4)
// go through the pooled region in the unpadded input collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
for(int m=r_st; m<r_end; m++) else
{ collector = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
for(int n=c_st; n<c_end; n++) """
{ for i in xrange(nd):
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0]; ccode += """
collector = (a > collector) ? a : collector; // go through the pooled region in the unpadded input
} for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
} {
z[0] = collector; i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
// update maximum
dtype_%(x)s a;
if (%(total_ndim)s == 4)
a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
collector = (a > collector) ? a : collector;
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
ccode += """
z[0] = collector;
""" """
elif self.mode in ('sum', 'average_exc_pad', 'average_inc_pad'): elif self.mode in ('sum', 'average_exc_pad', 'average_inc_pad'):
ccode += """ ccode += """
// initialize the sum at zero // initialize the sum at zero
collector = ((dtype_%(x)s)(0)); collector = ((dtype_%(x)s)(0));
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
{
for(int n=c_st; n<c_end; n++)
{
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
collector += a;
}
}
""" """
for i in xrange(nd):
ccode += """
// go through the pooled region in the unpadded input
for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
{
i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
// update sum
dtype_%(x)s a;
if (%(total_ndim)s == 4)
a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
collector += a;
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
if self.mode == "sum": if self.mode == "sum":
ccode += """ ccode += """
z[0] = collector; z[0] = collector;
""" """
elif self.mode == 'average_inc_pad' and self.ignore_border: elif self.mode == 'average_inc_pad' and self.ignore_border:
# region size = product over all pooling dimensions
region_size = ' * '.join('ws[%d]' % i for i in xrange(nd))
ccode += """ ccode += """
z[0] = collector / (ws0 * ws1); z[0] = collector / (%(region_size)s);
""" """ % dict(region_size=region_size)
else: else:
# region size = number elements of in this region
region_size = ' * '.join('(r_end[%d]-r_st[%d])' % (i, i) for i in xrange(nd))
ccode += """ ccode += """
z[0] = collector / ((r_end-r_st)*(c_end-c_st)); z[0] = collector / (%(region_size)s);
""" """ % dict(region_size=region_size)
for i in xrange(nd):
ccode += """
} // loop over pooling dimension
"""
ccode += """ ccode += """
} } // for loop over non-pooling dimensions
} } // if z_prod
}
}
""" """
return ccode % locals() return ccode % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (0, 6, 8, 6, self.openmp) return (0, 6, 8, 7, self.openmp)
class PoolGrad(OpenMPOp): class PoolGrad(OpenMPOp):
__props__ = ('ignore_border', 'mode') __props__ = ('ndim', 'ignore_border', 'mode')
@staticmethod @staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)): def out_shape(imgshape, ds, ignore_border=False, st=None, padding=None, ndim=None):
"""Return the shape of the output from this op, for input of given """Return the shape of the output from this op, for input of given
shape and flags. shape and flags.
Parameters Parameters
---------- ----------
imgshape : tuple of integers or scalar Theano variables imgshape : tuple of integers or scalar Theano variables
the shape of a tensor of images. The last two elements are the shape of a tensor of images. The last N elements are
interpreted as the number of rows, and the number of cols. interpreted as the downsampling dimensions.
ds : tuple of two ints ds : tuple of N ints
downsample factor over rows and columns this parameter downsample factor over rows and columns this parameter
indicates the size of the pooling region indicates the size of the pooling region
st : tuple of two ints
the stride size. This is the distance between the pooling
regions. If it's set to None, in which case it equlas ds.
ignore_border : bool ignore_border : bool
if ds doesn't divide imgshape, do we include an extra If ds doesn't divide imgshape, do we include an extra row/col/slice
row/col of partial downsampling (False) or ignore it of partial downsampling (False) or ignore it (True).
(True). st : list or tuple of N ints or None
padding : tuple of two ints Stride size, which is the number of shifts over rows/cols/slices to get the
(pad_h, pad_w), pad zeros to extend beyond four borders of next pool region. If st is None, it is considered equal to ds
the images, pad_h is the size of the top and bottom (no overlap on pooling regions).
margins, and pad_w is the size of the left and right padding : tuple of N ints or None
margins. For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
Returns Returns
------- -------
list : list :
the shape of the output from this op, for input of given the shape of the output from this op, for input of given
shape. This will have the same length as imgshape, but shape. This will have the same length as imgshape, but
with last two elements reduced as per the downsampling & with last N elements reduced as per the downsampling &
ignore_border flags. ignore_border flags.
""" """
if len(imgshape) < 2: if ndim is None:
raise TypeError('imgshape must have at least two elements ' ndim = len(imgshape) - 2
'(rows, cols)')
if len(imgshape) < ndim:
raise TypeError('imgshape must have at least {} dimensions'.format(ndim))
if st is None: if st is None:
st = ds st = ds
r, c = imgshape[-2:] if padding is None:
r += padding[0] * 2 padding = (0,) * ndim
c += padding[1] * 2 patch_shape = tuple(tensor.extract_constant(imgshape[-ndim + i]) + padding[i] * 2
for i in xrange(ndim))
if ignore_border:
out_r = (r - ds[0]) // st[0] + 1 def compute_out(v, downsample, stride):
out_c = (c - ds[1]) // st[1] + 1 if ignore_border:
if isinstance(r, theano.Variable): out = (v - downsample) // stride + 1
nr = tensor.maximum(out_r, 0) if isinstance(out, theano.Variable):
else: return tensor.maximum(out, 0)
nr = numpy.maximum(out_r, 0) else:
if isinstance(c, theano.Variable): return numpy.maximum(out, 0)
nc = tensor.maximum(out_c, 0)
else:
nc = numpy.maximum(out_c, 0)
else:
if isinstance(r, theano.Variable):
nr = tensor.switch(tensor.ge(st[0], ds[0]),
(r - 1) // st[0] + 1,
tensor.maximum(0, (r - 1 - ds[0]) //
st[0] + 1) + 1)
elif st[0] >= ds[0]:
nr = (r - 1) // st[0] + 1
else:
nr = max(0, (r - 1 - ds[0]) // st[0] + 1) + 1
if isinstance(c, theano.Variable):
nc = tensor.switch(tensor.ge(st[1], ds[1]),
(c - 1) // st[1] + 1,
tensor.maximum(0, (c - 1 - ds[1]) //
st[1] + 1) + 1)
elif st[1] >= ds[1]:
nc = (c - 1) // st[1] + 1
else: else:
nc = max(0, (c - 1 - ds[1]) // st[1] + 1) + 1 if isinstance(v, theano.Variable):
return tensor.switch(tensor.ge(stride, downsample),
(v - 1) // stride + 1,
tensor.maximum(0, (v - 1 - downsample) //
stride + 1) + 1)
elif stride >= downsample:
return (v - 1) // stride + 1
else:
return max(0, (v - 1 - downsample) // stride + 1) + 1
out_shape = [compute_out(patch_shape[i], ds[i], st[i]) for i in xrange(ndim)]
rval = list(imgshape[:-2]) + [nr, nc] rval = list(imgshape[:-ndim]) + out_shape
return rval return rval
def __init__(self, ignore_border, mode='max', openmp=None): def __init__(self, ignore_border, mode='max', ndim=None, openmp=None):
self.ndim = ndim
self.ignore_border = ignore_border self.ignore_border = ignore_border
if mode not in ['max', 'sum', 'average_inc_pad', 'average_exc_pad']: if mode not in ['max', 'sum', 'average_inc_pad', 'average_exc_pad']:
raise ValueError( raise ValueError(
...@@ -689,6 +813,7 @@ class PoolGrad(OpenMPOp): ...@@ -689,6 +813,7 @@ class PoolGrad(OpenMPOp):
def prepare_node(self, node, storage_map, compute_map): def prepare_node(self, node, storage_map, compute_map):
if len(node.inputs) < 5: # 5 for AveragePoolGrad, 6 for MaxPoolGrad if len(node.inputs) < 5: # 5 for AveragePoolGrad, 6 for MaxPoolGrad
# Old interface # Old interface
self.ndim = len(node.op.ds)
self.mode = node.op.mode self.mode = node.op.mode
ws = theano.tensor.constant(node.op.ds) ws = theano.tensor.constant(node.op.ds)
st = theano.tensor.constant(node.op.st) st = theano.tensor.constant(node.op.st)
...@@ -720,26 +845,32 @@ class PoolGrad(OpenMPOp): ...@@ -720,26 +845,32 @@ class PoolGrad(OpenMPOp):
class MaxPoolGrad(PoolGrad): class MaxPoolGrad(PoolGrad):
def __init__(self, ignore_border, openmp=None): def __init__(self, ignore_border, ndim=None, openmp=None):
PoolGrad.__init__(self, ignore_border, mode='max', openmp=openmp) PoolGrad.__init__(self, ignore_border, mode='max', ndim=ndim, openmp=openmp)
def make_node(self, x, maxout, gz, ws, stride=None, pad=(0, 0)): def make_node(self, x, maxout, gz, ws, stride=None, pad=None):
# make_node should only be called by the grad function of # make_node should only be called by the grad function of
# Pool, so these asserts should not fail. # Pool, so these asserts should not fail.
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
maxout = tensor.as_tensor_variable(maxout) maxout = tensor.as_tensor_variable(maxout)
gz = tensor.as_tensor_variable(gz) gz = tensor.as_tensor_variable(gz)
nd = self.ndim
if nd is None:
nd = x.ndim - 2
if stride is None: if stride is None:
stride = ws stride = ws
if pad is None:
pad = (0,) * nd
ws = tensor.as_tensor_variable(ws) ws = tensor.as_tensor_variable(ws)
stride = tensor.as_tensor_variable(stride) stride = tensor.as_tensor_variable(stride)
pad = tensor.as_tensor_variable(pad) pad = tensor.as_tensor_variable(pad)
assert isinstance(x, Variable) and x.ndim == 4 assert isinstance(x, Variable) and x.ndim >= nd
assert isinstance(maxout, Variable) and maxout.ndim == 4 assert isinstance(maxout, Variable) and maxout.ndim >= nd
assert isinstance(gz, Variable) and gz.ndim == 4 assert isinstance(gz, Variable) and gz.ndim >= nd
assert isinstance(ws, Variable) and ws.ndim == 1 assert isinstance(ws, Variable) and ws.ndim == 1
assert isinstance(stride, Variable) and stride.ndim == 1 assert isinstance(stride, Variable) and stride.ndim == 1
assert isinstance(pad, Variable) and pad.ndim == 1 assert isinstance(pad, Variable) and pad.ndim == 1
assert x.ndim == maxout.ndim == gz.ndim >= nd
if not ws.dtype.startswith('int'): if not ws.dtype.startswith('int'):
raise TypeError('Pool downsample parameters must be ints.') raise TypeError('Pool downsample parameters must be ints.')
if not stride.dtype.startswith('int'): if not stride.dtype.startswith('int'):
...@@ -752,41 +883,51 @@ class MaxPoolGrad(PoolGrad): ...@@ -752,41 +883,51 @@ class MaxPoolGrad(PoolGrad):
assert self.mode == 'max' assert self.mode == 'max'
x, maxout, gz, ws, stride, pad = inp x, maxout, gz, ws, stride, pad = inp
gx_stg, = out gx_stg, = out
assert ws.shape == stride.shape == pad.shape == (2,) nd = self.ndim
# number of pooling output rows if nd is None:
pr = maxout.shape[-2] nd = len(x.shape) - 2
# number of pooling output cols assert ws.shape == stride.shape == pad.shape == (nd,)
pc = maxout.shape[-1] if len(x.shape) < nd:
ws0, ws1 = ws raise NotImplementedError(
st0, st1 = stride 'MaxPoolGrad requires input with {} or more dimensions'.format(nd))
pad_h = pad[0] pool_out_shp = maxout.shape[-nd:]
pad_w = pad[1] img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in xrange(nd))
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
# pad the image # pad the image
if (pad_h, pad_w) != (0, 0): if max(pad) != 0:
y = numpy.zeros( y = numpy.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
(x.shape[0], x.shape[1], img_rows, img_cols), y[(slice(None),) * (len(x.shape) - nd) +
dtype=x.dtype) tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))] = x
y[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)] = x
else: else:
y = x y = x
gx = numpy.zeros_like(y) gx = numpy.zeros_like(y)
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]): # precompute the region boundaries for each dimension
for r in xrange(pr): region_ranges = [[] for i in xrange(nd)]
row_st = builtins.max(r * st0, pad_h) for i in xrange(nd):
row_end = builtins.min(row_st + ws0, img_rows) for j in xrange(pool_out_shp[i]):
for c in xrange(pc): start = builtins.max(j * stride[i], pad[i])
col_st = builtins.max(c * st1, pad_w) end = builtins.min(start + ws[i], img_shp[i])
col_end = builtins.min(col_st + ws1, img_cols) region_ranges[i].append(xrange(start, end))
for row_ind in xrange(row_st, row_end):
for col_ind in xrange(col_st, col_end): # iterate over non-pooling dimensions
if (maxout[n, k, r, c] == y[n, k, row_ind, col_ind]): for k in numpy.ndindex(*x.shape[:-nd]):
gx[n, k, row_ind, col_ind] += gz[n, k, r, c] gxk = gx[k]
gzk = gz[k]
yk = y[k]
maxoutk = maxout[k]
# iterate over pooling regions
for r in numpy.ndindex(*pool_out_shp):
maxout_value = maxoutk[r]
# iterate inside region
for c in itertools.product(*[region_ranges[i][r[i]]
for i in xrange(nd)]):
if maxout_value == yk[c]:
gxk[c] += gzk[r]
# unpad the image # unpad the image
gx = gx[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)] gx = gx[(slice(None),) * (len(x.shape) - nd) +
tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))]
gx_stg[0] = gx gx_stg[0] = gx
def grad(self, inp, grads): def grad(self, inp, grads):
...@@ -794,7 +935,8 @@ class MaxPoolGrad(PoolGrad): ...@@ -794,7 +935,8 @@ class MaxPoolGrad(PoolGrad):
ggx, = grads ggx, = grads
return ([theano.tensor.zeros_like(x), return ([theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout), theano.tensor.zeros_like(maxout),
DownsampleFactorMaxGradGrad(ignore_border=self.ignore_border)( DownsampleFactorMaxGradGrad(ndim=self.ndim,
ignore_border=self.ignore_border)(
x, maxout, ggx, ws, stride, pad)] + x, maxout, ggx, ws, stride, pad)] +
[DisconnectedType()() for i in inp[3:]]) [DisconnectedType()() for i in inp[3:]])
...@@ -805,161 +947,253 @@ class MaxPoolGrad(PoolGrad): ...@@ -805,161 +947,253 @@ class MaxPoolGrad(PoolGrad):
assert self.mode == 'max' assert self.mode == 'max'
x, z, gz, ws, stride, pad = inp x, z, gz, ws, stride, pad = inp
gx, = out gx, = out
nd = self.ndim
if nd is None:
nd = node.inputs[0].ndim - 2
total_ndim = node.inputs[0].ndim
non_pool_ndim = total_ndim - nd
fail = sub['fail'] fail = sub['fail']
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
if self.openmp: if self.openmp:
omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)' # run in parallel over each pooling block
omp_parallel = '#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)'
else: else:
omp_parallel = '' omp_parallel = ''
return """
ccode = """
// sanity checks // sanity checks
int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0); int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int z_typenum = PyArray_ObjectType((PyObject*)%(z)s, 0); int z_typenum = PyArray_ObjectType((PyObject*)%(z)s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0); int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0);
int ws0, ws1, st0, st1, pd0, pd1;
int z_r, z_c;
int r, c; // shape of the padded_input
if ((x_typenum != z_typenum) || (x_typenum != gz_typenum)) if ((x_typenum != z_typenum) || (x_typenum != gz_typenum))
{ {
PyErr_SetString(PyExc_ValueError, "input types must all match"); PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)s; %(fail)s;
} }
if(PyArray_NDIM(%(x)s)!=4) if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
{ {
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
%(fail)s; %(fail)s;
} }
if(PyArray_NDIM(%(z)s)!=4) if(PyArray_NDIM(%(z)s)!=%(total_ndim)s)
{ {
PyErr_SetString(PyExc_ValueError, "z must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "z must be a %(total_ndim)sD ndarray");
%(fail)s; %(fail)s;
} }
if(PyArray_NDIM(%(gz)s)!=4) if(PyArray_NDIM(%(gz)s)!=%(total_ndim)s)
{ {
PyErr_SetString(PyExc_ValueError, "gz must be a 4d ndarray"); PyErr_SetString(PyExc_ValueError, "gz must be a %(total_ndim)sD ndarray");
%(fail)s; %(fail)s;
} }
if(PyArray_DIM(%(ws)s, 0)!=2) if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
{ {
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size 2"); PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
%(fail)s; %(fail)s;
} }
if(PyArray_DIM(%(stride)s, 0)!=2) if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
{ {
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size 2"); PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
%(fail)s; %(fail)s;
} }
if(PyArray_DIM(%(pad)s, 0)!=2) if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
{ {
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size 2"); PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
%(fail)s; %(fail)s;
} }
// Getting ws, stride and pad int z[%(nd)s]; // shape of the output
ws0 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 0)); int r[%(nd)s]; // shape of the padded_input
ws1 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 1)); int ws[%(nd)s];
st0 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 0)); int st[%(nd)s];
st1 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 1)); int pd[%(nd)s];
pd0 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 0)); int nonzero_padding;
pd1 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 1)); nonzero_padding = 0;
z_r = PyArray_DIMS(%(z)s)[2]; for (int i=0; i<%(nd)s; i++)
z_c = PyArray_DIMS(%(z)s)[3]; {
r = PyArray_DIMS(%(x)s)[2]; ws[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
c = PyArray_DIMS(%(x)s)[3]; st[i] = *((npy_intp*)PyArray_GETPTR1(%(stride)s, i));
r += pd0 * 2; pd[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
c += pd1 * 2; z[i] = PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i];
// allocating memory for gx r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
if ((!%(gx)s) if (pd[i]>0)
|| !PyArray_ISCONTIGUOUS(%(gx)s) nonzero_padding = 1;
|| *PyArray_DIMS(%(gx)s)!=4 }
||(PyArray_DIMS(%(gx)s)[0] != PyArray_DIMS(%(x)s)[0]) // allocating memory for output, if necessary
||(PyArray_DIMS(%(gx)s)[1] != PyArray_DIMS(%(x)s)[1]) int mem_nec;
||(PyArray_DIMS(%(gx)s)[2] != PyArray_DIMS(%(x)s)[2]) mem_nec = 0;
||(PyArray_DIMS(%(gx)s)[3] != PyArray_DIMS(%(x)s)[3]) if ((!%(gx)s) || !PyArray_ISCONTIGUOUS(%(gx)s)
) || *PyArray_DIMS(%(gx)s)!=%(total_ndim)s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<%(total_ndim)s; i++)
{
if (PyArray_DIMS(%(gx)s)[i] != PyArray_DIMS(%(x)s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{ {
Py_XDECREF(%(gx)s); Py_XDECREF(%(gx)s);
%(gx)s = (PyArrayObject*) PyArray_ZEROS(4, PyArray_DIMS(%(x)s), x_typenum,0); %(gx)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(x)s), x_typenum,0);
} }
else { else {
PyArray_FILLWBYTE(%(gx)s, 0); PyArray_FILLWBYTE(%(gx)s, 0);
} }
dtype_%(z)s maximum; // temp var for maximum value in a region dtype_%(z)s maximum; // temp var for maximum value in a region
if (z_r && z_c) int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<%(nd)s; i++)
{ {
int r_st, r_end, c_st, c_end; z_prod *= z[i];
}
if (z_prod)
{
// will be used to hold start and end index of a region
int r_st[%(nd)s];
int r_end[%(nd)s];
// index for iterating over the pooling regions
int r_idx[%(nd)s];
// placeholder for PyArray indexing (output)
long int o_idx[%(total_ndim)s];
// placeholder for PyArray indexing (input)
long int i_idx[%(total_ndim)s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<%(non_pool_ndim)s; i++)
{
non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
}
%(omp_parallel)s %(omp_parallel)s
for(int t = 0; t < PyArray_DIMS(%(x)s)[0] * PyArray_DIMS(%(x)s)[1]; t++){ // first loop over non-pooling dimensions
int b = t %% PyArray_DIMS(%(x)s)[0]; for (int t=0; t<non_pooling_prod; t++)
int k = t / PyArray_DIMS(%(x)s)[0]; {
for(int i=0; i < z_r; i++){ // compute the non-pooling index in each dimension
r_st = i * st0; if (%(non_pool_ndim)s!=0)
r_end = r_st + ws0; {
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<%(non_pool_ndim)s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
o_idx[i - 1] =o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for i in xrange(nd):
ccode += """
for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
// skip the padding // skip the padding
r_st = r_st < pd0 ? pd0 : r_st; r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
r_end = r_end > (r - pd0) ? r - pd0 : r_end; r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
// from padded_img space to img space // from padded_img space to img space
r_st -= pd0; r_st[%(i)s] -= pd[%(i)s];
r_end -= pd0; r_end[%(i)s] -= pd[%(i)s];
for(int j=0; j<z_c; j++){ // use the index to find the correct position in the output
c_st = j * st1; o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
c_end = c_st + ws1; """ % dict(i=i, non_pool_ndim=non_pool_ndim)
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st; ccode += """
c_end = c_end > (c - pd1) ? c - pd1 : c_end; dtype_%(gz)s * gz;
// change coordinates from padding_img space into img space if (%(total_ndim)s == 4)
c_st -= pd1; {
c_end -= pd1; // the maximum value
maximum = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
// the gradient corresponding to this maximum value in z
gz = ((dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the maximum value // the maximum value
maximum = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,b,k,i,j)))[0]; maximum = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s,o_idx)))[0];
// the gradient corresponding to this maximum value in z // the gradient corresponding to this maximum value in z
dtype_%(gz)s * gz = ( gz = ((dtype_%(gz)s*)(PyArray_GetPtr(%(gz)s, o_idx)));
(dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, b, k, i, j))); }
// go through the pooled region in the unpadded input """
for(int m=r_st; m<r_end; m++) for i in xrange(nd):
ccode += """
// go through the pooled region in the unpadded input
for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
{
i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(x)s a;
dtype_%(gx)s * gx;
if (%(total_ndim)s == 4)
{ {
for(int n=c_st; n<c_end; n++) a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
{ gx = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
dtype_%(gx)s * gx = (
(dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, b, k, m, n)));
if (a == maximum){
gx[0] = gx[0] + gz[0];
}
}
} }
} else
} {
} a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
} gx = ((dtype_%(gx)s*)(PyArray_GetPtr(%(gx)s, i_idx)));
""" % locals() }
if (a == maximum){
gx[0] = gx[0] + gz[0];
}
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
for i in xrange(nd):
ccode += """
} // loop over pooling dimension
"""
ccode += """
} // for loop over non-pooling dimensions
} // if z_prod
"""
return ccode % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (0, 9, self.openmp) return (0, 10, self.openmp)
class AveragePoolGrad(PoolGrad): class AveragePoolGrad(PoolGrad):
def __init__(self, ignore_border, mode='average_inc_pad'): def __init__(self, ignore_border, mode='average_inc_pad', ndim=None):
assert mode in ['sum', 'average_inc_pad', 'average_exc_pad'] assert mode in ['sum', 'average_inc_pad', 'average_exc_pad']
PoolGrad.__init__(self, ignore_border, mode) PoolGrad.__init__(self, ignore_border, mode, ndim)
# There is an extra dummy parameter to match the parameter count # There is an extra dummy parameter to match the parameter count
# of MaxPoolGrad. They have to keep the same interface because of # of MaxPoolGrad. They have to keep the same interface because of
# the DownsampleFactorMaxGrad trick to keep old scripts working # the DownsampleFactorMaxGrad trick to keep old scripts working
# (see downsample.py for details on this). # (see downsample.py for details on this).
def make_node(self, x, gz, ws, stride=None, pad=(0, 0), dummy=None): def make_node(self, x, gz, ws, stride=None, pad=None, dummy=None):
# make_node should only be called by the grad function of # make_node should only be called by the grad function of
# Pool, so these asserts should not fail. # Pool, so these asserts should not fail.
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
gz = tensor.as_tensor_variable(gz) gz = tensor.as_tensor_variable(gz)
nd = self.ndim
if nd is None:
nd = x.ndim - 2
if stride is None: if stride is None:
stride = ws stride = ws
if pad is None:
pad = (0,) * nd
ws = tensor.as_tensor_variable(ws) ws = tensor.as_tensor_variable(ws)
stride = tensor.as_tensor_variable(stride) stride = tensor.as_tensor_variable(stride)
pad = tensor.as_tensor_variable(pad) pad = tensor.as_tensor_variable(pad)
assert isinstance(x, Variable) and x.ndim == 4 assert isinstance(x, Variable) and x.ndim >= nd
assert isinstance(gz, Variable) and gz.ndim == 4 assert isinstance(gz, Variable) and gz.ndim >= nd
assert isinstance(ws, Variable) and ws.ndim == 1 assert isinstance(ws, Variable) and ws.ndim == 1
assert isinstance(stride, Variable) and stride.ndim == 1 assert isinstance(stride, Variable) and stride.ndim == 1
assert x.ndim == gz.ndim >= nd
assert isinstance(pad, Variable) and pad.ndim == 1 assert isinstance(pad, Variable) and pad.ndim == 1
if not ws.dtype.startswith('int'): if not ws.dtype.startswith('int'):
raise TypeError('Pool downsample parameters must be ints.') raise TypeError('Pool downsample parameters must be ints.')
...@@ -972,96 +1206,333 @@ class AveragePoolGrad(PoolGrad): ...@@ -972,96 +1206,333 @@ class AveragePoolGrad(PoolGrad):
def perform(self, node, inp, out): def perform(self, node, inp, out):
x, gz, ws, stride, pad = inp x, gz, ws, stride, pad = inp
gx_stg, = out gx_stg, = out
assert ws.shape == stride.shape == pad.shape == (2,) nd = self.ndim
if self.mode == 'average_exc_pad' and pad[0] != 0 and pad[1] != 0: if nd is None:
nd = len(x.shape) - 2
assert ws.shape == stride.shape == pad.shape == (nd,)
if len(x.shape) < nd:
raise NotImplementedError(
'AveragePoolGrad requires input with {} or more dimensions'.format(nd))
if self.mode == 'average_exc_pad' and max(pad) != 0:
raise NotImplementedError() raise NotImplementedError()
z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad) z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad, nd)
if (gx_stg[0] is None) or (gx_stg[0].shape != z_shape): if (gx_stg[0] is None) or (gx_stg[0].shape != z_shape):
gx_stg[0] = numpy.empty(z_shape, dtype=x.dtype) gx_stg[0] = numpy.empty(z_shape, dtype=x.dtype)
zz = gx_stg[0] zz = gx_stg[0]
# number of pooling output rows # size of pooling output
pr = zz.shape[-2] pool_out_shp = zz.shape[-nd:]
# number of pooling output cols img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in xrange(nd))
pc = zz.shape[-1]
ws0, ws1 = ws
st0, st1 = stride
pad_h = pad[0]
pad_w = pad[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
inc_pad = self.mode == 'average_inc_pad' inc_pad = self.mode == 'average_inc_pad'
sum_mode = self.mode == 'sum' sum_mode = self.mode == 'sum'
# pad the image # initialize the padded output
if (pad_h, pad_w) != (0, 0): gx = numpy.zeros((x.shape[:-nd] + img_shp), dtype=x.dtype)
y = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols), # precompute the region boundaries and sizes for each dimension
dtype=x.dtype) region_slices = [[] for i in xrange(nd)]
y[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)] = x region_sizes = [[] for i in xrange(nd)]
else: for i in xrange(nd):
y = x for j in xrange(pool_out_shp[i]):
gx = numpy.zeros_like(y) if sum_mode or inc_pad:
for n in xrange(x.shape[0]): start = j * stride[i]
for k in xrange(x.shape[1]): else:
for r in xrange(pr): start = builtins.max(j * stride[i], pad[i])
if sum_mode or inc_pad: end = builtins.min(start + ws[i], img_shp[i])
row_st = r * st0 region_slices[i].append(slice(start, end))
else: region_sizes[i].append(end - start)
row_st = builtins.max(r * st0, pad_h)
row_end = builtins.min(row_st + ws0, img_rows) # iterate over non-pooling dimensions
for c in xrange(pc): region_slice = [None] * nd
if sum_mode or inc_pad: for k in numpy.ndindex(*x.shape[:-nd]):
col_st = c * st1 gzk = gz[k]
else: gxk = gx[k]
col_st = builtins.max(c * st1, pad_w) # iterate over pooling regions
col_end = builtins.min(col_st + ws1, img_cols) for r in numpy.ndindex(*pool_out_shp):
if sum_mode: region_size = 1
val = gz[n, k, r, c] for i in xrange(nd):
else: region_slice[i] = region_slices[i][r[i]]
val = gz[n, k, r, c] / ((row_end - row_st) * region_size *= region_sizes[i][r[i]]
(col_end - col_st)) if sum_mode:
gx[n, k, row_st:row_end, col_st:col_end] += val val = gzk[r]
else:
# divide by region size
val = gzk[r] / region_size
gxk[region_slice] += val
# unpad the image # unpad the image
gx = gx[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)] gx = gx[(slice(None),) * (len(x.shape) - nd) +
tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))]
gx_stg[0] = gx gx_stg[0] = gx
def grad(self, inp, grads): def grad(self, inp, grads):
x, gz, ws, stride, pad = inp x, gz, ws, stride, pad = inp
ggx, = grads ggx, = grads
return ([theano.tensor.zeros_like(x), return ([theano.tensor.zeros_like(x),
Pool(ignore_border=self.ignore_border, mode=self.mode)(ggx, Pool(ignore_border=self.ignore_border,
ndim=self.ndim, mode=self.mode)(ggx,
ws, stride, pad)] + [DisconnectedType()() for i in inp[2:]]) ws, stride, pad)] + [DisconnectedType()() for i in inp[2:]])
def connection_pattern(self, node): def connection_pattern(self, node):
return [[1], [1], [0], [0], [0]] return [[1], [1], [0], [0], [0]]
def c_code(self, node, name, inp, out, sub):
x, gz, ws, stride, pad = inp
gx, = out
nd = self.ndim
if nd is None:
nd = node.inputs[0].ndim - 2
total_ndim = node.inputs[0].ndim
non_pool_ndim = total_ndim - nd
fail = sub['fail']
inc_pad = int(self.mode == 'average_inc_pad')
sum_mode = int(self.mode == 'sum')
if self.openmp:
# run in parallel over each pooling block
omp_parallel = '#pragma omp parallel for private(r_st, r_end, r_pad_width, r_idx, i_idx, o_idx) schedule(static)'
else:
omp_parallel = ''
ccode = """
// sanity checks
int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0);
if (x_typenum != gz_typenum)
{
PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)s;
}
if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
{
PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
%(fail)s;
}
if(PyArray_NDIM(%(gz)s)!=%(total_ndim)s)
{
PyErr_SetString(PyExc_ValueError, "gz must be a %(total_ndim)sD ndarray");
%(fail)s;
}
if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
%(fail)s;
}
if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
%(fail)s;
}
if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
%(fail)s;
}
int z[%(nd)s]; // shape of the output
int r[%(nd)s]; // shape of the padded_input
int ws[%(nd)s];
int st[%(nd)s];
int pd[%(nd)s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<%(nd)s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(%(stride)s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
z[i] = PyArray_DIMS(%(gz)s)[%(non_pool_ndim)s + i];
r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
if (!%(inc_pad)s && !%(sum_mode)s && nonzero_padding)
{
PyErr_SetString(PyExc_ValueError,
"padding must be zero for average_exc_pad");
%(fail)s;
}
// allocating memory for output, if necessary
int mem_nec;
mem_nec = 0;
if ((!%(gx)s) || !PyArray_ISCONTIGUOUS(%(gx)s)
|| *PyArray_DIMS(%(gx)s)!=%(total_ndim)s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<%(total_ndim)s; i++)
{
if (PyArray_DIMS(%(gx)s)[i] != PyArray_DIMS(%(x)s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
Py_XDECREF(%(gx)s);
%(gx)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(x)s), x_typenum,0);
}
else {
PyArray_FILLWBYTE(%(gx)s, 0);
}
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<%(nd)s; i++)
{
z_prod *= z[i];
}
if (z_prod)
{
// will be used to hold start and end index of a region
int r_st[%(nd)s];
int r_end[%(nd)s];
// padded region size
int r_pad_width[%(nd)s];
// index for iterating over the pooling regions
int r_idx[%(nd)s];
// placeholder for PyArray indexing (output)
long int o_idx[%(total_ndim)s];
// placeholder for PyArray indexing (input)
long int i_idx[%(total_ndim)s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<%(non_pool_ndim)s; i++)
{
non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
}
%(omp_parallel)s
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (%(non_pool_ndim)s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<%(non_pool_ndim)s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
o_idx[i - 1] =o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for i in xrange(nd):
ccode += """
for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
if (!%(sum_mode)s && !%(inc_pad)s && r_st[%(i)s] < pd[%(i)s])
{
r_st[%(i)s] = pd[%(i)s];
}
r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
r_pad_width[%(i)s] = r_end[%(i)s] - r_st[%(i)s];
// from padded_img space to img space
r_st[%(i)s] = r_st[%(i)s] - pd[%(i)s] > 0 ? r_st[%(i)s] - pd[%(i)s] : 0;
r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] - pd[%(i)s] ? r[%(i)s] - 2 * pd[%(i)s] : r_end[%(i)s] - pd[%(i)s];
// use the index to find the correct position in the output
o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
""" % dict(i=i, sum_mode=sum_mode, inc_pad=inc_pad, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(gz)s * gz;
dtype_%(gz)s val;
if (%(total_ndim)s == 4)
{
// the gradient for this region
gz = ((dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the gradient for this region
gz = ((dtype_%(gz)s*)(PyArray_GetPtr(%(gz)s, o_idx)));
}
// compute the contribution
if (%(sum_mode)s)
{
val = gz[0];
}
else
{
val = gz[0] / (%(region_size)s);
}
"""
region_size = ' * '.join('r_pad_width[%d]' % i for i in xrange(nd))
for i in xrange(nd):
ccode += """
// go through the pooled region in the unpadded input
for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
{
i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(gx)s * gx;
if (%(total_ndim)s == 4)
{
gx = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
}
else
{
gx = ((dtype_%(gx)s*)(PyArray_GetPtr(%(gx)s, i_idx)));
}
gx[0] = gx[0] + val;
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
for i in xrange(nd):
ccode += """
} // loop over pooling dimension
"""
ccode += """
} // for loop over non-pooling dimensions
} // if z_prod
"""
return ccode % locals()
def c_code_cache_version(self):
return (0, 3, self.openmp)
class DownsampleFactorMaxGradGrad(OpenMPOp): class DownsampleFactorMaxGradGrad(OpenMPOp):
__props__ = ('ignore_border', 'mode') __props__ = ('ndim', 'ignore_border', 'mode')
def __init__(self, ignore_border, mode='max', openmp=None): def __init__(self, ignore_border, mode='max', ndim=None, openmp=None):
self.ndim = ndim
self.ignore_border = ignore_border self.ignore_border = ignore_border
self.mode = mode self.mode = mode
super(DownsampleFactorMaxGradGrad, self).__init__(openmp=openmp) super(DownsampleFactorMaxGradGrad, self).__init__(openmp=openmp)
assert self.mode == 'max' assert self.mode == 'max'
def make_node(self, x, maxout, gz, ws, stride=None, pad=(0, 0)): def make_node(self, x, maxout, gz, ws, stride=None, pad=None):
# make_node should only be called by the grad function of # make_node should only be called by the grad function of
# MaxPoolGrad, so these asserts should not fail. # MaxPoolGrad, so these asserts should not fail.
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
maxout = tensor.as_tensor_variable(maxout) maxout = tensor.as_tensor_variable(maxout)
gz = tensor.as_tensor_variable(gz) gz = tensor.as_tensor_variable(gz)
assert x.ndim == 4 nd = self.ndim
assert maxout.ndim == 4 if nd is None:
assert gz.ndim == 4 nd = x.ndim - 2
if stride is None: if stride is None:
stride = ws stride = ws
if isinstance(pad, (tuple, list)): if pad is None:
if tuple(pad) != (0, 0) and not self.ignore_border: pad = (0,) * nd
elif isinstance(pad, (tuple, list)):
if max(pad) != 0 and not self.ignore_border:
raise NotImplementedError( raise NotImplementedError(
'padding works only with ignore_border=True') 'padding works only with ignore_border=True')
if isinstance(ws, (tuple, list)): if isinstance(ws, (tuple, list)):
if pad[0] >= ws[0] or pad[1] >= ws[1]: if any(pad[i] >= ws[i] for i in range(nd)):
raise NotImplementedError( raise NotImplementedError(
'padding_h and padding_w must be smaller than strides') 'padding_h and padding_w must be smaller than strides')
ws = tensor.as_tensor_variable(ws) ws = tensor.as_tensor_variable(ws)
...@@ -1070,6 +1541,7 @@ class DownsampleFactorMaxGradGrad(OpenMPOp): ...@@ -1070,6 +1541,7 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
assert ws.ndim == 1 assert ws.ndim == 1
assert stride.ndim == 1 assert stride.ndim == 1
assert pad.ndim == 1 assert pad.ndim == 1
assert x.ndim == maxout.ndim == gz.ndim >= nd
if not ws.dtype.startswith('int'): if not ws.dtype.startswith('int'):
raise TypeError('Pool downsample parameters must be ints.') raise TypeError('Pool downsample parameters must be ints.')
if not stride.dtype.startswith('int'): if not stride.dtype.startswith('int'):
...@@ -1081,49 +1553,56 @@ class DownsampleFactorMaxGradGrad(OpenMPOp): ...@@ -1081,49 +1553,56 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
def perform(self, node, inp, out): def perform(self, node, inp, out):
x, maxout, ggx, ws, stride, pad = inp x, maxout, ggx, ws, stride, pad = inp
z, = out z, = out
assert ws.shape == stride.shape == pad.shape == (2,) nd = self.ndim
if len(x.shape) != 4: if nd is None:
nd = len(x.shape) - 2
assert ws.shape == stride.shape == pad.shape == (nd,)
if len(x.shape) < nd:
raise NotImplementedError( raise NotImplementedError(
'DownsampleFactorMaxGradGrad requires 4D input for now') 'DownsampleFactorMaxGradGrad requires input '
'with {} or more dimensions'.format(nd))
if (z[0] is None) or (z[0].shape != maxout.shape): if (z[0] is None) or (z[0].shape != maxout.shape):
z[0] = numpy.zeros(maxout.shape, dtype=x.dtype) z[0] = numpy.zeros(maxout.shape, dtype=x.dtype)
ggz = z[0] # grad wrt maxout_grad has the same shape as maxout ggz = z[0] # grad wrt maxout_grad has the same shape as maxout
# number of pooling output rows # size of pooling output
pr = ggz.shape[-2] pool_out_shp = ggz.shape[-nd:]
# number of pooling output cols img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in xrange(nd))
pc = ggz.shape[-1]
ws0, ws1 = ws
st0, st1 = stride
pd0, pd1 = pad
img_rows = x.shape[-2] + 2 * pd0
img_cols = x.shape[-1] + 2 * pd1
# pad the image and its gradients # pad the image and its gradients
if pd0 != 0 and pd1 != 0: if max(pad) > 0:
y_padded = numpy.zeros( y_padded = numpy.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
(x.shape[0], x.shape[1], img_rows, img_cols), y_padded[(slice(None),) * (len(x.shape) - nd) +
dtype=x.dtype) + x.min() - 1 tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))] = x
y_padded[:, :, pd0:(img_rows - pd0), pd1:(img_cols - pd1)] = x ggx_padded = numpy.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
ggx_padded = numpy.zeros( ggx_padded[(slice(None),) * (len(x.shape) - nd) +
(x.shape[0], x.shape[1], img_rows, img_cols), tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))] = ggx
dtype=x.dtype)
ggx_padded[:, :, pd0:(img_rows - pd0), pd1:(img_cols - pd1)] = ggx
else: else:
y_padded = x y_padded = x
ggx_padded = ggx ggx_padded = ggx
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]): # precompute the region boundaries for each dimension
for r in xrange(pr): region_ranges = [[] for i in xrange(nd)]
row_st = r * st0 for i in xrange(nd):
row_end = builtins.min(row_st + ws0, img_rows) for j in xrange(pool_out_shp[i]):
for c in xrange(pc): start = j * stride[i]
col_st = c * st1 end = builtins.min(start + ws[i], img_shp[i])
col_end = builtins.min(col_st + ws1, img_cols) region_ranges[i].append(xrange(start, end))
for row_ind in xrange(row_st, row_end):
for col_ind in xrange(col_st, col_end): # iterate over non-pooling dimensions
if (maxout[n, k, r, c] == y_padded[n, k, row_ind, col_ind]): for k in numpy.ndindex(*x.shape[:-nd]):
ggz[n, k, r, c] = ggx_padded[n, k, row_ind, col_ind] ggxk = ggx_padded[k]
ggzk = ggz[k]
yk = y_padded[k]
maxoutk = maxout[k]
# iterate over pooling regions
for r in numpy.ndindex(*pool_out_shp):
# iterate inside region
maxout_value = maxoutk[r]
for c in itertools.product(*[region_ranges[i][r[i]]
for i in xrange(nd)]):
if maxout_value == yk[c]:
ggzk[r] += ggxk[c]
def infer_shape(self, node, in_shapes): def infer_shape(self, node, in_shapes):
return [in_shapes[1]] return [in_shapes[1]]
...@@ -1133,8 +1612,9 @@ class DownsampleFactorMaxGradGrad(OpenMPOp): ...@@ -1133,8 +1612,9 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
gz, = grads gz, = grads
return [theano.tensor.zeros_like(x), return [theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout), theano.tensor.zeros_like(maxout),
MaxPoolGrad(ignore_border=self.ignore_border)(x, maxout, gz, MaxPoolGrad(ignore_border=self.ignore_border,
ws, stride, pad), ndim=self.ndim)(x, maxout, gz,
ws, stride, pad),
DisconnectedType()(), DisconnectedType()(),
DisconnectedType()(), DisconnectedType()(),
DisconnectedType()()] DisconnectedType()()]
...@@ -1147,106 +1627,182 @@ class DownsampleFactorMaxGradGrad(OpenMPOp): ...@@ -1147,106 +1627,182 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
raise theano.gof.utils.MethodNotDefined() raise theano.gof.utils.MethodNotDefined()
x, maxout, ggx, ws, stride, pad = inp x, maxout, ggx, ws, stride, pad = inp
z, = out # the grad of grad z, = out # the grad of grad
nd = self.ndim
if nd is None:
nd = node.inputs[0].ndim - 2
total_ndim = node.inputs[0].ndim
non_pool_ndim = total_ndim - nd
fail = sub['fail'] fail = sub['fail']
ignore_border = int(self.ignore_border) ignore_border = int(self.ignore_border)
if self.openmp: if self.openmp:
omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)' # run in parallel over each pooling block
omp_parallel = '#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)'
else: else:
omp_parallel = '' omp_parallel = ''
return """ ccode = """
int ws0, ws1, st0, st1, pd0, pd1;
int z_typenum = PyArray_ObjectType((PyObject*)%(maxout)s, 0); int z_typenum = PyArray_ObjectType((PyObject*)%(maxout)s, 0);
int z_r, z_c; int z[%(nd)s]; // shape of the output
int r, c; // shape of the padded_input int r[%(nd)s]; // shape of the padded_input
if(PyArray_DIM(%(ws)s, 0)!=2) int ws[%(nd)s];
int st[%(nd)s];
int pd[%(nd)s];
if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
{ {
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size 2"); PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
%(fail)s; %(fail)s;
} }
if(PyArray_DIM(%(stride)s, 0)!=2) if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
{ {
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size 2"); PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
%(fail)s; %(fail)s;
} }
if(PyArray_DIM(%(pad)s, 0)!=2) if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
{ {
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size 2"); PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
%(fail)s; %(fail)s;
} }
// Getting ws, stride and pad for (int i=0; i<%(nd)s; i++)
ws0 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 0)); {
ws1 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 1)); ws[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
st0 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 0)); st[i] = *((npy_intp*)PyArray_GETPTR1(%(stride)s, i));
st1 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 1)); pd[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
pd0 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 0)); z[i] = PyArray_DIMS(%(maxout)s)[%(non_pool_ndim)s + i];
pd1 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 1)); r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
z_r = PyArray_DIMS(%(maxout)s)[2]; }
z_c = PyArray_DIMS(%(maxout)s)[3]; // allocating memory for output, if necessary
r = PyArray_DIMS(%(x)s)[2]; int mem_nec;
c = PyArray_DIMS(%(x)s)[3]; mem_nec = 0;
r += pd0 * 2; if ((!%(z)s) || !PyArray_ISCONTIGUOUS(%(z)s)
c += pd1 * 2; || *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
// allocating memory for output {
if ((!%(z)s) mem_nec = 1;
|| !PyArray_ISCONTIGUOUS(%(z)s) }
|| *PyArray_DIMS(%(z)s)!=4 if (!mem_nec)
||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(maxout)s)[0]) {
||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(maxout)s)[1]) for (int i=0; i<%(total_ndim)s; i++)
||(PyArray_DIMS(%(z)s)[2] != PyArray_DIMS(%(maxout)s)[2]) {
||(PyArray_DIMS(%(z)s)[3] != PyArray_DIMS(%(maxout)s)[3]) if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(maxout)s)[i])
) {
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, PyArray_DIMS(%(maxout)s), z_typenum,0); %(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(maxout)s), z_typenum,0);
} }
else { else {
PyArray_FILLWBYTE(%(z)s, 0); PyArray_FILLWBYTE(%(z)s, 0);
} }
dtype_%(maxout)s maximum; // temp var for maximum value in a region dtype_%(maxout)s maximum; // temp var for maximum value in a region
int r_st, r_end, c_st, c_end; // will be used to hold start and end index of a region
int r_st[%(nd)s];
int r_end[%(nd)s];
// index for iterating over the pooling regions
int r_idx[%(nd)s];
// placeholder for PyArray indexing (output)
long int o_idx[%(total_ndim)s];
// placeholder for PyArray indexing (input)
long int i_idx[%(total_ndim)s];
// loop over non-pooling dimensions
int non_pooling_prod;
non_pooling_prod = 1;
for (int i=0; i<%(non_pool_ndim)s; i++)
{
non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
}
%(omp_parallel)s %(omp_parallel)s
for(int t = 0; t < PyArray_DIMS(%(x)s)[0] * PyArray_DIMS(%(x)s)[1]; t++){ // first loop over non-pooling dimensions
int b = t %% PyArray_DIMS(%(x)s)[0]; for (int t=0; t<non_pooling_prod; t++)
int k = t / PyArray_DIMS(%(x)s)[0]; {
for(int i=0; i < z_r; i++){ // compute the non-pooling index in each dimension
r_st = i * st0; if (%(non_pool_ndim)s!=0)
r_end = r_st + ws0; {
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<%(non_pool_ndim)s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for i in xrange(nd):
ccode += """
for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
// skip the padding // skip the padding
r_st = r_st < pd0 ? pd0 : r_st; r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
r_end = r_end > (r - pd0) ? r - pd0 : r_end; r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
// from padded_img space to img space // from padded_img space to img space
r_st -= pd0; r_st[%(i)s] -= pd[%(i)s];
r_end -= pd0; r_end[%(i)s] -= pd[%(i)s];
for(int j=0; j<z_c; j++){ // use the index to find the correct position in the output
c_st = j * st1; o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
c_end = c_st + ws1; """ % dict(i=i, non_pool_ndim=non_pool_ndim)
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st; ccode += """
c_end = c_end > (c - pd1) ? c - pd1 : c_end; dtype_%(z)s * z;
// from padding_img space into img space if (%(total_ndim)s == 4)
c_st -= pd1; {
c_end -= pd1; // the maximum value
maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
// z at this position
z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the maximum value // the maximum value
maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,b,k,i,j)))[0]; maximum = ((dtype_%(maxout)s*)(PyArray_GetPtr(%(maxout)s,o_idx)))[0];
// z at this position // z at this position
dtype_%(z)s * z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, b, k, i, j))); z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s,o_idx)));
// go through the pooled region in the unpadded input }
for(int m=r_st; m<r_end; m++) """
for i in xrange(nd):
ccode += """
// go through the pooled region in the unpadded input
for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
{
i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(x)s a;
dtype_%(ggx)s * ggx;
if (%(total_ndim)s == 4)
{ {
for(int n=c_st; n<c_end; n++) a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
{ ggx = ((dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
dtype_%(ggx)s * ggx = (
(dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s, b, k, m, n)));
if (a == maximum){
z[0] += ggx[0];
}
}
} }
} else
} {
} a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
""" % locals() ggx = ((dtype_%(ggx)s*)(PyArray_GetPtr(%(ggx)s,i_idx)));
}
if (a == maximum){
z[0] += ggx[0];
}
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
for i in xrange(nd):
ccode += """
} // loop over pooling dimension
"""
ccode += """
} // for loop over non-pooling dimensions
"""
return ccode % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (0, 3, self.openmp) return (0, 4, self.openmp)
from __future__ import absolute_import, print_function, division from __future__ import absolute_import, print_function, division
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
from itertools import product from itertools import product
import os import os
import unittest import unittest
...@@ -14,7 +15,7 @@ import numpy ...@@ -14,7 +15,7 @@ import numpy
import theano import theano
import theano.tensor as tensor import theano.tensor as tensor
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.tensor.signal.pool import (Pool, pool_2d, from theano.tensor.signal.pool import (Pool, pool_2d, pool_3d,
MaxPoolGrad, AveragePoolGrad, MaxPoolGrad, AveragePoolGrad,
max_pool_2d_same_size, max_pool_2d_same_size,
DownsampleFactorMaxGradGrad) DownsampleFactorMaxGradGrad)
...@@ -57,6 +58,36 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -57,6 +58,36 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val[k][i, j] = func(patch) output_val[k][i, j] = func(patch)
return output_val return output_val
@staticmethod
def numpy_max_pool_nd(input, ds, ignore_border=False, mode='max'):
'''Helper function, implementing pool_nd in pure numpy'''
if len(input.shape) < len(ds):
raise NotImplementedError('input should have at least %s dim,'
' shape is %s'
% (str(ds), str(input.shape)))
nd = len(ds)
si = [0] * nd
if not ignore_border:
for i in range(nd):
if input.shape[-nd + i] % ds[i]:
si[i] += 1
out_shp = list(input.shape[:-nd])
for i in range(nd):
out_shp.append(input.shape[-nd + i] // ds[i] + si[i])
output_val = numpy.zeros(out_shp)
func = numpy.max
if mode == 'sum':
func = numpy.sum
elif mode != 'max':
func = numpy.average
for l in numpy.ndindex(*input.shape[:-nd]):
for r in numpy.ndindex(*output_val.shape[-nd:]):
patch = input[l][tuple(slice(r[i] * ds[i], (r[i] + 1) * ds[i])
for i in range(nd))]
output_val[l][r] = func(patch)
return output_val
@staticmethod @staticmethod
def numpy_max_pool_2d_stride_padding( def numpy_max_pool_2d_stride_padding(
x, ds, ignore_border=True, st=None, padding=(0, 0), mode='max'): x, ds, ignore_border=True, st=None, padding=(0, 0), mode='max'):
...@@ -111,6 +142,60 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -111,6 +142,60 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val[k][i, j] = func(patch) output_val[k][i, j] = func(patch)
return output_val return output_val
@staticmethod
def numpy_max_pool_nd_stride_padding(
input, ds, ignore_border=True, st=None, padding=None, mode='max'):
assert ignore_border
nd = len(ds)
if padding is None:
padding = (0,) * nd
if st is None:
st = (0,) * nd
assert len(padding) == len(ds) == len(st)
assert all(ds[i] > padding[i] for i in range(nd))
def pad_img(x):
# initialize padded input
y = numpy.zeros(
x.shape[0:-nd] +
tuple(x.shape[-nd + i] + padding[i] * 2 for i in range(nd)),
dtype=x.dtype)
# place the unpadded input in the center
block = ((slice(None),) * (len(x.shape) - nd) +
tuple(slice(padding[i], x.shape[-nd + i] + padding[i])
for i in range(nd)))
y[block] = x
return y
pad_img_shp = list(input.shape[:-nd])
out_shp = list(input.shape[:-nd])
for i in range(nd):
padded_size = input.shape[-nd + i] + 2 * padding[i]
pad_img_shp.append(padded_size)
out_shp.append((padded_size - ds[i]) // st[i] + 1)
output_val = numpy.zeros(out_shp)
padded_input = pad_img(input)
func = numpy.max
if mode == 'sum':
func = numpy.sum
elif mode != 'max':
func = numpy.average
inc_pad = mode == 'average_inc_pad'
for l in numpy.ndindex(*input.shape[:-nd]):
for r in numpy.ndindex(*output_val.shape[-nd:]):
region = []
for i in range(nd):
r_st = r[i] * st[i]
r_end = builtins.min(r_st + ds[i], pad_img_shp[-nd + i])
if not inc_pad:
r_st = builtins.max(r_st, padding[i])
r_end = builtins.min(r_end, input.shape[-nd + i] + padding[i])
region.append(slice(r_st, r_end))
patch = padded_input[l][region]
output_val[l][r] = func(patch)
return output_val
@staticmethod @staticmethod
def numpy_max_pool_2d_stride(input, ds, ignore_border=False, st=None, def numpy_max_pool_2d_stride(input, ds, ignore_border=False, st=None,
mode='max'): mode='max'):
...@@ -174,84 +259,167 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -174,84 +259,167 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val[k][i, j] = func(patch) output_val[k][i, j] = func(patch)
return output_val return output_val
@staticmethod
def numpy_max_pool_nd_stride(input, ds, ignore_border=False, st=None,
mode='max'):
'''Helper function, implementing pooling in pure numpy
this function provides st input to indicate the stide size
for the pooling regions. if not indicated, st == sd.'''
nd = len(ds)
if st is None:
st = ds
assert len(st) == len(ds)
out_shp = list(input.shape[:-nd])
for i in range(nd):
out = 0
if input.shape[-nd + i] - ds[i] >= 0:
out = (input.shape[-nd + i] - ds[i]) // st[i] + 1
if not ignore_border:
if out > 0:
if input.shape[-nd + i] - ((out - 1) * st[i] + ds[i]) > 0:
if input.shape[-nd + i] - out * st[i] > 0:
out += 1
else:
if input.shape[-nd + i] > 0:
out += 1
out_shp.append(out)
func = numpy.max
if mode == 'sum':
func = numpy.sum
elif mode != 'max':
func = numpy.average
output_val = numpy.zeros(out_shp)
for l in numpy.ndindex(*input.shape[:-nd]):
for r in numpy.ndindex(*output_val.shape[-nd:]):
region = []
for i in range(nd):
r_st = r[i] * st[i]
r_end = builtins.min(r_st + ds[i], input.shape[-nd + i])
region.append(slice(r_st, r_end))
patch = input[l][region]
output_val[l][r] = func(patch)
return output_val
def test_DownsampleFactorMax(self): def test_DownsampleFactorMax(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
# generate random images # maxpool, input size
maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3)) examples = (
imval = rng.rand(4, 2, 16, 16) ((2,), (16,)),
images = tensor.dtensor4() ((2,), (4, 16,)),
for maxpoolshp, ignore_border, mode in product(maxpoolshps, ((2,), (4, 2, 16,)),
[True, False], ((1, 1), (4, 2, 16, 16)),
['max', ((2, 2), (4, 2, 16, 16)),
'sum', ((3, 3), (4, 2, 16, 16)),
'average_inc_pad', ((3, 2), (4, 2, 16, 16)),
'average_exc_pad']): ((3, 2, 2), (3, 2, 16, 16, 16)),
# print 'maxpoolshp =', maxpoolshp ((2, 3, 2), (3, 2, 16, 16, 16)),
# print 'ignore_border =', ignore_border ((2, 2, 3), (3, 2, 16, 16, 16)),
((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
# Pure Numpy computation )
numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp,
ignore_border, for example, ignore_border, mode in product(examples,
mode=mode) [True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad']):
(maxpoolshp, inputsize) = example
imval = rng.rand(*inputsize)
images = theano.shared(imval)
# Pure Numpy computation
numpy_output_val = self.numpy_max_pool_nd(imval, maxpoolshp,
ignore_border,
mode=mode)
# The pool_2d or pool_3d helper methods
if len(maxpoolshp) == 2:
output = pool_2d(images, maxpoolshp, ignore_border, output = pool_2d(images, maxpoolshp, ignore_border,
mode=mode) mode=mode)
f = function([images, ], [output, ]) f = function([], [output, ])
output_val = f(imval) output_val = f()
utt.assert_allclose(output_val, numpy_output_val) utt.assert_allclose(output_val, numpy_output_val)
elif len(maxpoolshp) == 3:
# Pool op output = pool_3d(images, maxpoolshp, ignore_border,
maxpool_op = Pool(ignore_border=ignore_border, mode=mode)
mode=mode)(images, maxpoolshp) f = function([], [output, ])
output_val = f()
output_shape = Pool.out_shape(imval.shape, maxpoolshp,
ignore_border=ignore_border)
utt.assert_allclose(numpy.asarray(output_shape), numpy_output_val.shape)
f = function([images], maxpool_op)
output_val = f(imval)
utt.assert_allclose(output_val, numpy_output_val) utt.assert_allclose(output_val, numpy_output_val)
# Pool op
maxpool_op = Pool(ndim=len(maxpoolshp),
ignore_border=ignore_border,
mode=mode)(images, maxpoolshp)
output_shape = Pool.out_shape(imval.shape, maxpoolshp,
ndim=len(maxpoolshp),
ignore_border=ignore_border)
utt.assert_allclose(numpy.asarray(output_shape), numpy_output_val.shape)
f = function([], maxpool_op)
output_val = f()
utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxStride(self): def test_DownsampleFactorMaxStride(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 3), (5, 3), (16, 16)) # maxpool, stride, ignore_border, input, output sizes
stridesizes = ((1, 1), (3, 3), (5, 7),) examples = (
# generate random images ((1, 1), (1, 1), True, (4, 10, 16, 16), (4, 10, 16, 16)),
imval = rng.rand(4, 10, 16, 16) ((1, 1), (3, 3), True, (4, 10, 16, 16), (4, 10, 6, 6)),
# The same for each mode ((1, 1), (5, 7), True, (4, 10, 16, 16), (4, 10, 4, 3)),
outputshps = ( ((1, 1), (1, 1), False, (4, 10, 16, 16), (4, 10, 16, 16)),
(4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3), ((1, 1), (3, 3), False, (4, 10, 16, 16), (4, 10, 6, 6)),
(4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3), ((1, 1), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
(4, 10, 14, 14), (4, 10, 5, 5), (4, 10, 3, 2), ((3, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 14, 14)),
(4, 10, 14, 14), (4, 10, 6, 6), (4, 10, 4, 3), ((3, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 5, 5)),
(4, 10, 12, 14), (4, 10, 4, 5), (4, 10, 3, 2), ((3, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
(4, 10, 12, 14), (4, 10, 5, 6), (4, 10, 4, 3), ((3, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 14, 14)),
(4, 10, 1, 1), (4, 10, 1, 1), (4, 10, 1, 1), ((3, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 6, 6)),
(4, 10, 1, 1), (4, 10, 1, 1), (4, 10, 1, 1),) ((3, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
((5, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 12, 14)),
images = tensor.dtensor4() ((5, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 4, 5)),
indx = 0 ((5, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
for mode, maxpoolshp, ignore_border in product(['max', ((5, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 12, 14)),
'sum', ((5, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 5, 6)),
'average_inc_pad', ((5, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
'average_exc_pad'], ((16, 16), (1, 1), True, (4, 10, 16, 16), (4, 10, 1, 1)),
maxpoolshps, ((16, 16), (3, 3), True, (4, 10, 16, 16), (4, 10, 1, 1)),
[True, False]): ((16, 16), (5, 7), True, (4, 10, 16, 16), (4, 10, 1, 1)),
for stride in stridesizes: ((16, 16), (1, 1), False, (4, 10, 16, 16), (4, 10, 1, 1)),
outputshp = outputshps[indx % len(outputshps)] ((16, 16), (3, 3), False, (4, 10, 16, 16), (4, 10, 1, 1)),
indx += 1 ((16, 16), (5, 7), False, (4, 10, 16, 16), (4, 10, 1, 1)),
# Pool op ((3,), (5,), True, (16,), (3,)),
numpy_output_val = \ ((3,), (5,), True, (2, 16,), (2, 3,)),
self.numpy_max_pool_2d_stride(imval, maxpoolshp, ((5,), (3,), True, (2, 3, 16,), (2, 3, 4,)),
ignore_border, stride, ((5, 1, 3), (3, 3, 3), True, (2, 16, 16, 16), (2, 4, 6, 5)),
mode) ((5, 1, 3), (3, 3, 3), True, (4, 2, 16, 16, 16), (4, 2, 4, 6, 5)),
assert numpy_output_val.shape == outputshp, ( )
"outshape is %s, calculated shape is %s"
% (outputshp, numpy_output_val.shape)) for example, mode in product(examples, ['max',
maxpool_op = \ 'sum',
Pool(ignore_border=ignore_border, mode=mode)( 'average_inc_pad',
images, maxpoolshp, stride) 'average_exc_pad']):
f = function([images], maxpool_op) (maxpoolshp, stride, ignore_border, inputshp, outputshp) = example
output_val = f(imval) # generate random images
utt.assert_allclose(output_val, numpy_output_val) imval = rng.rand(*inputshp)
images = theano.shared(imval)
# Pool op
numpy_output_val = \
self.numpy_max_pool_nd_stride(imval, maxpoolshp,
ignore_border, stride,
mode)
assert numpy_output_val.shape == outputshp, (
"outshape is %s, calculated shape is %s"
% (outputshp, numpy_output_val.shape))
maxpool_op = \
Pool(ndim=len(maxpoolshp),
ignore_border=ignore_border,
mode=mode)(images, maxpoolshp, stride)
f = function([], maxpool_op)
output_val = f()
utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxStrideExtra(self): def test_DownsampleFactorMaxStrideExtra(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
...@@ -287,7 +455,8 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -287,7 +455,8 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
"outshape is %s, calculated shape is %s" "outshape is %s, calculated shape is %s"
% (outputshp, numpy_output_val.shape)) % (outputshp, numpy_output_val.shape))
maxpool_op = \ maxpool_op = \
Pool(ignore_border=ignore_border, mode=mode)( Pool(ignore_border=ignore_border,
ndim=len(maxpoolshp), mode=mode)(
images, maxpoolshp, stride) images, maxpoolshp, stride)
f = function([images], maxpool_op) f = function([images], maxpool_op)
output_val = f(imval) output_val = f(imval)
...@@ -296,319 +465,351 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -296,319 +465,351 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
def test_DownsampleFactorMaxPaddingStride(self): def test_DownsampleFactorMaxPaddingStride(self):
ignore_border = True # padding does not support ignore_border=False ignore_border = True # padding does not support ignore_border=False
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolsizes = [(3, 3), (4, 4), (3, 4), (4, 3), (2, 2)] # maxpool, stride, padding, input sizes
stridesizes = [(2, 2), (2, 2), (1, 1), (1, 2), (2, 2)] examples = (
paddingsizes = [(2, 2), (1, 2), (2, 1), (0, 0), (1, 1)] ((3,), (2,), (2,), (5,)),
imgsizes = [(5, 5), (5, 5), (5, 6), (6, 5), (5, 5)] ((3,), (2,), (2,), (4, 5)),
m = 4 # minibatch ((3,), (2,), (2,), (4, 2, 5)),
c = 2 # channel size ((3,), (2,), (2,), (4, 2, 5, 5)),
images = tensor.dtensor4() ((3, 3), (2, 2), (2, 2), (4, 2, 5, 5)),
for indx, mode in product(numpy.arange(len(maxpoolsizes)), ((4, 4), (2, 2), (1, 2), (4, 2, 5, 5)),
['max', 'sum', 'average_inc_pad', ((3, 4), (1, 1), (2, 1), (4, 2, 5, 6)),
'average_exc_pad']): ((4, 3), (1, 2), (0, 0), (4, 2, 6, 5)),
imgsize = imgsizes[indx] ((2, 2), (2, 2), (1, 1), (4, 2, 5, 5)),
imval = rng.rand(m, c, imgsize[0], imgsize[1]) - 0.5 ((4, 3, 2), (1, 2, 2), (0, 2, 1), (4, 6, 6, 5)),
((4, 3, 2), (1, 2, 2), (0, 2, 1), (4, 2, 6, 5, 5)),
stridesize = stridesizes[indx] )
maxpoolsize = maxpoolsizes[indx] for example, mode in product(examples,
paddingsize = paddingsizes[indx] ['max', 'sum', 'average_inc_pad',
numpy_output_val = self.numpy_max_pool_2d_stride_padding( 'average_exc_pad']):
imval, maxpoolsize, ignore_border, (maxpoolshp, stridesize, paddingsize, inputsize) = example
imval = rng.rand(*inputsize) - 0.5
images = theano.shared(imval)
numpy_output_val = self.numpy_max_pool_nd_stride_padding(
imval, maxpoolshp, ignore_border,
stridesize, paddingsize, mode) stridesize, paddingsize, mode)
maxpool_op = Pool(ignore_border=ignore_border, mode=mode)( maxpool_op = Pool(
images, maxpoolsize, stridesize, paddingsize) ndim=len(maxpoolshp),
f = function([images], maxpool_op) ignore_border=ignore_border,
output_val = f(imval) mode=mode
)(images, maxpoolshp, stridesize, paddingsize)
f = function([], maxpool_op)
output_val = f()
utt.assert_allclose(output_val, numpy_output_val) utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxPaddingStride_grad(self): def test_DownsampleFactorMaxPaddingStride_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5)) # maxpool, stride, padding, input sizes
maxpoolsizes = ((5, 3), (3, 5), (3, 3)) examples = (
stridesizes = ((3, 2), (2, 3), (3, 3)) ((10,), (5,), (3,), (2,)),
paddingsizes = ((2, 2), (2, 1), (2, 2)) ((10,), (5,), (3,), (2, 2)),
((10,), (5,), (3,), (1, 1, 2)),
((10, 10), (5, 3), (3, 2), (1, 1, 2, 2)),
((10, 5), (3, 5), (2, 3), (1, 1, 2, 1)),
((5, 5), (3, 3), (3, 3), (1, 1, 2, 2)),
((5, 5, 5), (3, 3, 3), (3, 3, 3), (1, 1, 2, 2, 2)),
)
# average_inc_pad and average_exc_pad do not # average_inc_pad and average_exc_pad do not
# support grad with padding # support grad with padding
for mode in ['max', 'sum']: for mode in ['max', 'sum']:
for i in range(len(imgsizes)): for example in examples:
imgsize = imgsizes[i] (maxpoolshp, stridesize, paddingsize, inputsize) = example
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0 imval = rng.rand(*inputsize) * 10.0
maxpoolsize = maxpoolsizes[i]
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
def mp(input): def mp(input):
return Pool(ignore_border=True, mode=mode)( return Pool(
input, maxpoolsize, stridesize, paddingsize) ndim=len(maxpoolshp),
ignore_border=True,
mode=mode,
)(input, maxpoolshp, stridesize, paddingsize)
utt.verify_grad(mp, [imval], rng=rng) utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMax_grad(self): def test_DownsampleFactorMax_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 2), (2, 3)) # maxpool, input sizes
imval = rng.rand(2, 3, 3, 4) * 10.0 examples = (
# more variance means numeric gradient will be more accurate ((2,), (3,)),
((2,), (2, 3)),
for maxpoolshp, ignore_border, mode in product(maxpoolshps, ((2,), (2, 3, 3)),
[True, False], ((1, 1), (2, 3, 3, 4)),
['max', ((3, 2), (2, 3, 3, 4)),
'sum', ((2, 3), (2, 3, 3, 4)),
'average_inc_pad', ((1, 1, 1), (2, 3, 3)),
'average_exc_pad']): ((3, 2, 2), (2, 3, 3, 4)),
((2, 3, 2), (2, 3, 3, 4, 4)),
((2, 2, 3), (2, 3, 3, 4, 4)),
)
for example, ignore_border, mode in product(examples,
[True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad']):
(maxpoolshp, inputsize) = example
imval = rng.rand(*inputsize) * 10.0
# more variance means numeric gradient will be more accurate
def mp(input): def mp(input):
return Pool(ignore_border=ignore_border, mode=mode)( return Pool(ndim=len(maxpoolshp),
input, maxpoolshp) ignore_border=ignore_border,
mode=mode)(input, maxpoolshp)
utt.verify_grad(mp, [imval], rng=rng) utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMax_grad_st(self): # pool, stride, input sizes
pool_grad_st_examples = (
((1,), (1,), (16,)),
((1,), (3,), (1, 16)),
((1,), (5,), (1, 2, 16)),
((2,), (1,), (16,)),
((2,), (3,), (1, 16)),
((2,), (5,), (1, 2, 16)),
((1, 1), (1, 1), (1, 2, 16, 16)),
((1, 1), (3, 3), (1, 2, 16, 16)),
((1, 1), (5, 7), (1, 2, 16, 16)),
((3, 3), (1, 1), (1, 2, 16, 16)),
((3, 3), (3, 3), (1, 2, 16, 16)),
((3, 3), (5, 7), (1, 2, 16, 16)),
((5, 3), (1, 1), (1, 2, 16, 16)),
((5, 3), (3, 3), (1, 2, 16, 16)),
((5, 3), (5, 7), (1, 2, 16, 16)),
((5, 1, 2), (1, 1, 1), (16, 3, 16)),
((5, 1, 2), (3, 1, 2), (1, 16, 3, 16)),
((5, 1, 2), (5, 1, 4), (1, 2, 16, 3, 16)),
((5, 3), (3, 2), (1, 2, 16, 16)),
((5, 3), (7, 5), (1, 2, 16, 16)),
((5, 3), (10, 6), (1, 2, 16, 16)),
((5, 5), (1, 1), (1, 2, 8, 5)),
((3, 2), (2, 3), (1, 2, 8, 5)),
((7, 7), (10, 10), (1, 2, 8, 5)),
((9, 9), (1, 1), (1, 2, 8, 5)),
)
@parameterized.expand(product(pool_grad_st_examples,
[True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad']),
testcase_func_name=utt.custom_name_func)
def test_DownsampleFactorMax_grad_st(self, example, ignore_border, mode):
# checks the gradient for the case that stride is used # checks the gradient for the case that stride is used
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 3), (5, 3))
stridesizes = ((1, 1), (3, 3), (5, 7))
imval = rng.rand(1, 2, 16, 16)
for maxpoolshp, ignore_border, mode, stride in product(maxpoolshps,
[True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad'],
stridesizes):
def mp(input):
return Pool(ignore_border=ignore_border, mode=mode)(
input, maxpoolshp, stride)
utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMax_grad_st_extra(self): (maxpoolshp, stridesize, inputsize) = example
# checks the gradient for the case imval = rng.rand(*inputsize)
# that stride is used for extra examples
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
(2, 3), (10, 10), (1, 1))
imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
(8, 5), (8, 5), (8, 5))
for mode in ['max', 'sum', 'average_inc_pad', 'average_exc_pad']: def mp(input):
for indx in numpy.arange(len(maxpoolshps)): return Pool(ndim=len(maxpoolshp),
imvsize = imvsizs[indx] ignore_border=ignore_border,
imval = rng.rand(1, 2, imvsize[0], imvsize[1]) mode=mode)(input, maxpoolshp, stridesize)
stride = stridesizes[indx] utt.verify_grad(mp, [imval], rng=rng)
maxpoolshp = maxpoolshps[indx]
for ignore_border in [True, False]:
def mp(input):
return Pool(ignore_border=ignore_border, mode=mode)(
input, maxpoolshp, stride)
utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMaxGrad_grad(self): def test_DownsampleFactorMaxGrad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 2), (2, 3)) # maxpool, input sizes
imval = rng.rand(2, 3, 3, 4) * 10.0 examples = (
# more variance means numeric gradient will be more accurate ((2,), (2,)),
((2,), (2, 3)),
for maxpoolshp in maxpoolshps: ((1, 1), (2, 3, 3, 4)),
((3, 2), (2, 3, 3, 4)),
((2, 3), (2, 3, 3, 4)),
((1, 1, 1), (2, 3, 3, 4)),
((3, 2, 2), (2, 3, 3, 4)),
((2, 3, 2), (2, 3, 3, 4)),
((2, 2, 3), (2, 3, 3, 4)),
((2, 2, 3), (2, 1, 3, 3, 4)),
)
for (maxpoolshp, inputsize) in examples:
imval = rng.rand(*inputsize) * 10.0
# more variance means numeric gradient will be more accurate
for ignore_border in [True, False]: for ignore_border in [True, False]:
# print 'maxpoolshp =', maxpoolshp # print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border # print 'ignore_border =', ignore_border
# The shape of the gradient will be the shape of the output # The shape of the gradient will be the shape of the output
grad_shape = Pool.out_shape( grad_shape = Pool.out_shape(
imval.shape, maxpoolshp, ignore_border=ignore_border) imval.shape, maxpoolshp, ndim=len(maxpoolshp), ignore_border=ignore_border)
grad_val = rng.rand(*grad_shape) * 10.0 grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad): def mp(input, grad):
out = Pool(ignore_border=ignore_border)(input, maxpoolshp) out = Pool(
grad_op = MaxPoolGrad(ignore_border=ignore_border) ndim=len(maxpoolshp),
ignore_border=ignore_border)(input, maxpoolshp)
grad_op = MaxPoolGrad(
ndim=len(maxpoolshp),
ignore_border=ignore_border)
return grad_op(input, out, grad, maxpoolshp) return grad_op(input, out, grad, maxpoolshp)
utt.verify_grad(mp, [imval, grad_val], rng=rng) utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolGrad_grad(self): def test_AveragePoolGrad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
avgpoolshps = ((1, 1), (3, 2), (2, 3)) # avgpool, input sizes
imval = rng.rand(2, 3, 3, 4) * 10.0 examples = (
# more variance means numeric gradient will be more accurate ((2,), (2,)),
((2,), (2, 3)),
for avgpoolshp in avgpoolshps: ((1, 1), (2, 3, 3, 4)),
((3, 2), (2, 3, 3, 4)),
((2, 3), (2, 3, 3, 4)),
((1, 1, 1), (2, 3, 3, 4)),
((3, 2, 2), (2, 3, 3, 4)),
((2, 3, 2), (2, 3, 3, 4)),
((2, 2, 3), (2, 3, 3, 4)),
((2, 2, 3), (2, 1, 3, 3, 4)),
)
for (avgpoolshp, inputsize) in examples:
imval = rng.rand(*inputsize) * 10.0
# more variance means numeric gradient will be more accurate
for ignore_border in [True, False]: for ignore_border in [True, False]:
for mode in ['sum', 'average_inc_pad', 'average_exc_pad']: for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
# print 'maxpoolshp =', maxpoolshp # print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border # print 'ignore_border =', ignore_border
# The shape of the gradient will be the shape of the output # The shape of the gradient will be the shape of the output
grad_shape = Pool.out_shape( grad_shape = Pool.out_shape(
imval.shape, avgpoolshp, ignore_border=ignore_border) imval.shape, avgpoolshp, ndim=len(avgpoolshp),
ignore_border=ignore_border)
grad_val = rng.rand(*grad_shape) * 10.0 grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad): def mp(input, grad):
grad_op = AveragePoolGrad(ignore_border=ignore_border, grad_op = AveragePoolGrad(
mode=mode) ndim=len(avgpoolshp),
ignore_border=ignore_border, mode=mode)
return grad_op(input, grad, avgpoolshp) return grad_op(input, grad, avgpoolshp)
utt.verify_grad(mp, [imval, grad_val], rng=rng) utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxGrad_grad_st(self): @parameterized.expand(product(pool_grad_st_examples,
[True, False]),
testcase_func_name=utt.custom_name_func)
def test_DownsampleFactorMaxGrad_grad_st(self, example, ignore_border):
# checks the gradient of the gradient for # checks the gradient of the gradient for
# the case that stride is used # the case that stride is used
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 3), (5, 3)) (maxpoolshp, stride, inputsize) = example
stridesizes = ((1, 1), (3, 3), (5, 7)) imval = rng.rand(*inputsize)
imval = rng.rand(1, 2, 16, 16) grad_shape = Pool.out_shape(
imval.shape, maxpoolshp, ndim=len(maxpoolshp),
ignore_border=ignore_border, st=stride)
for maxpoolshp in maxpoolshps: # skip the grad verification when the output is empty
for ignore_border in [True, False]: if numpy.prod(grad_shape) != 0:
for stride in stridesizes: grad_val = rng.rand(*grad_shape)
grad_shape = Pool.out_shape(
imval.shape, maxpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
def mp(input, grad): def mp(input, grad):
out = Pool(ignore_border=ignore_border)( out = Pool(
input, maxpoolshp, stride) ndim=len(maxpoolshp),
grad_op = MaxPoolGrad(ignore_border=ignore_border) ignore_border=ignore_border)(input, maxpoolshp, stride)
return grad_op(input, out, grad, maxpoolshp, stride) grad_op = MaxPoolGrad(
ndim=len(maxpoolshp),
ignore_border=ignore_border)
return grad_op(input, out, grad, maxpoolshp, stride)
utt.verify_grad(mp, [imval, grad_val], rng=rng) utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolGrad_grad_st(self): @parameterized.expand(product(pool_grad_st_examples,
[True, False],
['sum',
'average_inc_pad',
'average_exc_pad']),
testcase_func_name=utt.custom_name_func)
def test_AveragePoolGrad_grad_st(self, example, ignore_border, mode):
# checks the gradient of the gradient for # checks the gradient of the gradient for
# the case that stride is used # the case that stride is used
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
avgpoolshps = ((1, 1), (3, 3), (5, 3)) (avgpoolshp, stride, inputsize) = example
stridesizes = ((1, 1), (3, 3), (5, 7)) imval = rng.rand(*inputsize)
imval = rng.rand(1, 2, 16, 16) grad_shape = Pool.out_shape(
imval.shape, avgpoolshp,
for avgpoolshp in avgpoolshps: ndim=len(avgpoolshp),
for ignore_border in [True, False]: ignore_border=ignore_border, st=stride)
for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
for stride in stridesizes:
grad_shape = Pool.out_shape(
imval.shape, avgpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
def mp(input, grad):
grad_op = AveragePoolGrad(
ignore_border=ignore_border, mode=mode)
return grad_op(input, grad, avgpoolshp, stride)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxGrad_grad_st_extra(self):
# checks the gradient of the gradient for the case that
# stride is used for extra examples
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
(2, 3), (10, 10), (1, 1))
imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
(8, 5), (8, 5), (8, 5))
for indx in numpy.arange(len(maxpoolshps)):
imvsize = imvsizs[indx]
imval = rng.rand(1, 2, imvsize[0], imvsize[1])
stride = stridesizes[indx]
maxpoolshp = maxpoolshps[indx]
for ignore_border in [True, False]:
grad_shape = Pool.out_shape(
imval.shape, maxpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
def mp(input, grad):
out = Pool(ignore_border=ignore_border)(input, maxpoolshp,
stride)
grad_op = MaxPoolGrad(ignore_border=ignore_border)
return grad_op(input, out, grad, maxpoolshp, stride)
# skip the grad verification when the output is empty
if numpy.prod(grad_shape) == 0:
continue
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolGrad_grad_st_extra(self):
# checks the gradient of the gradient for the case that
# stride is used for extra examples
rng = numpy.random.RandomState(utt.fetch_seed())
avgpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
(2, 3), (10, 10), (1, 1))
imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
(8, 5), (8, 5), (8, 5))
for indx in numpy.arange(len(avgpoolshps)): # skip the grad verification when the output is empty
imvsize = imvsizs[indx] if numpy.prod(grad_shape) != 0:
imval = rng.rand(1, 2, imvsize[0], imvsize[1]) grad_val = rng.rand(*grad_shape)
stride = stridesizes[indx]
avgpoolshp = avgpoolshps[indx]
for ignore_border in [True, False]:
for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
grad_shape = Pool.out_shape(
imval.shape, avgpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
def mp(input, grad): def mp(input, grad):
grad_op = AveragePoolGrad(ignore_border=ignore_border, grad_op = AveragePoolGrad(
mode=mode) ndim=len(avgpoolshp),
return grad_op(input, grad, avgpoolshp, stride) ignore_border=ignore_border,
mode=mode)
return grad_op(input, grad, avgpoolshp, stride)
# skip the grad verification when the output is empty utt.verify_grad(mp, [imval, grad_val], rng=rng)
if numpy.prod(grad_shape) == 0:
continue
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxPaddingStride_grad_grad(self): def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5)) # maxpool, stride, padding, input sizes
maxpoolsizes = ((5, 3), (3, 5), (3, 3)) examples = (
stridesizes = ((3, 2), (2, 3), (3, 3)) ((3,), (2,), (2,), (10,)),
paddingsizes = ((2, 2), (2, 1), (2, 2)) ((3,), (2,), (2,), (2, 10,)),
((3,), (2,), (2,), (2, 1, 10,)),
for i in range(len(imgsizes)): ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
imgsize = imgsizes[i] ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0 ((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
maxpoolsize = maxpoolsizes[i] ((3, 3), (3, 3), (2, 2), (1, 1, 5, 5)),
stridesize = stridesizes[i] ((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
paddingsize = paddingsizes[i] ((3, 5, 3), (2, 3, 2), (2, 1, 2), (1, 1, 5, 10, 5)),
((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
)
for (maxpoolshp, stridesize, paddingsize, inputsize) in examples:
imval = rng.rand(*inputsize) * 10.0
grad_shape = Pool.out_shape(imval.shape, grad_shape = Pool.out_shape(imval.shape,
maxpoolsize, st=stridesize, maxpoolshp,
ndim=len(maxpoolshp),
st=stridesize,
ignore_border=True, ignore_border=True,
padding=paddingsize) padding=paddingsize)
grad_val = rng.rand(*grad_shape) * 10.0 grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad): def mp(input, grad):
out = Pool(ignore_border=True)(input, maxpoolsize, stridesize, out = Pool(
paddingsize) ndim=len(maxpoolshp),
grad_op = MaxPoolGrad(ignore_border=True) ignore_border=True,
return grad_op(input, out, grad, maxpoolsize, stridesize, )(input, maxpoolshp, stridesize, paddingsize)
paddingsize) grad_op = MaxPoolGrad(ndim=len(maxpoolshp),
ignore_border=True)
return grad_op(input, out, grad, maxpoolshp, stridesize, paddingsize)
utt.verify_grad(mp, [imval, grad_val], rng=rng) utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolPaddingStride_grad_grad(self): def test_AveragePoolPaddingStride_grad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5)) # avgpool, stride, padding, input sizes
avgpoolsizes = ((5, 3), (3, 5), (3, 3)) examples = (
stridesizes = ((3, 2), (2, 3), (3, 3)) ((3,), (2,), (2,), (10,)),
paddingsizes = ((2, 2), (2, 1), (2, 2)) ((3,), (2,), (2,), (2, 10,)),
((3,), (2,), (2,), (2, 1, 10,)),
for i in range(len(imgsizes)): ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
imgsize = imgsizes[i] ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0 ((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
avgpoolsize = avgpoolsizes[i] ((3, 3), (3, 3), (2, 2), (1, 1, 5, 5)),
stridesize = stridesizes[i] ((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
paddingsize = paddingsizes[i] ((3, 5, 3), (2, 3, 2), (2, 1, 2), (1, 1, 5, 10, 5)),
((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
)
for (avgpoolshp, stridesize, paddingsize, inputsize) in examples:
imval = rng.rand(*inputsize) * 10.0
# 'average_exc_pad' with non-zero padding is not implemented # 'average_exc_pad' with non-zero padding is not implemented
for mode in ['sum', 'average_inc_pad']: for mode in ['sum', 'average_inc_pad']:
grad_shape = Pool.out_shape(imval.shape, grad_shape = Pool.out_shape(imval.shape,
avgpoolsize, st=stridesize, avgpoolshp,
ignore_border=True, padding=paddingsize) ndim=len(avgpoolshp),
st=stridesize,
ignore_border=True,
padding=paddingsize)
grad_val = rng.rand(*grad_shape) * 10.0 grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad): def mp(input, grad):
grad_op = AveragePoolGrad(ignore_border=True, mode=mode) grad_op = AveragePoolGrad(ndim=len(avgpoolshp),
return grad_op(input, grad, avgpoolsize, stridesize, paddingsize) ignore_border=True,
mode=mode)
return grad_op(input, grad, avgpoolshp, stridesize, paddingsize)
utt.verify_grad(mp, [imval, grad_val], rng=rng) utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMax_hessian(self): def test_DownsampleFactorMax_hessian(self):
...@@ -630,26 +831,31 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -630,26 +831,31 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
def test_DownsampleFactorMaxGradGrad_grad(self): def test_DownsampleFactorMaxGradGrad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5)) # maxpool, stride, padding, input sizes
maxpoolsizes = ((5, 3), (3, 5), (3, 3)) examples = (
stridesizes = ((3, 2), (2, 3), (3, 3)) ((3,), (2,), (2,), (10,)),
paddingsizes = ((2, 2), (2, 1), (2, 2)) ((3,), (2,), (2,), (2, 10,)),
((3,), (2,), (2,), (2, 1, 10,)),
for i in range(len(imgsizes)): ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
imgsize = imgsizes[i] ((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
imval1 = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0 ((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
imval2 = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0 ((3, 3), (3, 3), (2, 2), (1, 1, 5, 5)),
maxpoolsize = maxpoolsizes[i] ((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
stridesize = stridesizes[i] ((3, 5, 3), (2, 3, 2), (2, 1, 2), (1, 1, 5, 10, 5)),
paddingsize = paddingsizes[i] ((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
)
for (maxpoolshp, stridesize, paddingsize, inputsize) in examples:
imval1 = rng.rand(*inputsize) * 10.0
imval2 = rng.rand(*inputsize) * 10.0
def mp(input1, input2): def mp(input1, input2):
op1 = Pool(ignore_border=True) op1 = Pool(ndim=len(maxpoolshp), ignore_border=True)
pooled_out = op1(input1, maxpoolsize, stride=stridesize, pooled_out = op1(input1, maxpoolshp, stridesize, paddingsize)
pad=paddingsize) op2 = DownsampleFactorMaxGradGrad(
op2 = DownsampleFactorMaxGradGrad(ignore_border=True) ndim=len(maxpoolshp),
out = op2(input1, pooled_out, input2, ws=maxpoolsize, ignore_border=True)
stride=stridesize, pad=paddingsize) out = op2(input1, pooled_out, input2, maxpoolshp, stridesize, paddingsize)
return out return out
utt.verify_grad(mp, [imval1, imval2], rng=rng) utt.verify_grad(mp, [imval1, imval2], rng=rng)
...@@ -679,6 +885,32 @@ class TestDownsampleFactorMax(utt.InferShapeTester): ...@@ -679,6 +885,32 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
mode=mode) mode=mode)
utt.verify_grad(mp, [imval], rng=rng) utt.verify_grad(mp, [imval], rng=rng)
def test_max_pool_3d_3D(self):
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1, 1), (3, 2, 1))
imval = rng.rand(4, 5, 6)
images = tensor.dtensor3()
for maxpoolshp, ignore_border, mode in product(maxpoolshps,
[True, False],
['max', 'sum',
'average_inc_pad',
'average_exc_pad']):
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
numpy_output_val = self.numpy_max_pool_nd(imval, maxpoolshp,
ignore_border,
mode=mode)
output = pool_3d(images, maxpoolshp, ignore_border,
mode=mode)
output_val = function([images], output)(imval)
utt.assert_allclose(output_val, numpy_output_val)
def mp(input):
return pool_3d(input, maxpoolshp, ignore_border,
mode=mode)
utt.verify_grad(mp, [imval], rng=rng)
def test_max_pool_2d_2D_same_size(self): def test_max_pool_2d_2D_same_size(self):
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
test_input_array = numpy.array([[[ test_input_array = numpy.array([[[
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论