提交 a16e91f7 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

theano.tensor.signal.Pool with 3D support.

上级 172e699c
......@@ -35,7 +35,7 @@ from .nnet import GpuSoftmax
from .opt import (gpu_seqopt, register_opt,
op_lifter, register_opt2)
from .opt_util import alpha_merge, output_merge, inplace_allocempty
from .opt_util import alpha_merge, output_merge, inplace_allocempty, pad_dims, unpad_dims
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
......@@ -1253,7 +1253,7 @@ class GpuDnnPoolGrad(DnnBase):
return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
def dnn_pool(img, ws, stride=None, mode='max', pad=None):
"""
GPU pooling using cuDNN from NVIDIA.
......@@ -1267,13 +1267,13 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
img
Images to do the pooling over.
ws : tuple
Subsampling window size.
Subsampling window size. Should have 2 or 3 elements.
stride : tuple
Subsampling stride (default: (1, 1)).
Subsampling stride (default: (1, 1) or (1, 1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad : tuple
(padX, padY) or (padX, padY, padZ)
default: (0, 0)
default: (0, 0) or (0, 0, 0)
.. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
......@@ -1285,6 +1285,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
img = gpu_contiguous(img)
if stride is None:
stride = (1,) * len(ws)
if pad is None:
pad = (0,) * len(ws)
if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
context_name = ret.type.context_name
......@@ -1868,9 +1872,18 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
if not op.ignore_border:
return
img, ws, stride, pad = inputs
img = as_gpuarray_variable(img, ctx_name)
nd = op.ndim if op.ndim else (img.ndim - 2)
if nd not in (2, 3):
return
img = gpu_contiguous(as_gpuarray_variable(img, ctx_name))
mode = op.mode
return dnn_pool(gpu_contiguous(img), ws, stride=stride, pad=pad, mode=mode)
if img.ndim == nd + 2:
return dnn_pool(img, ws, stride=stride, pad=pad, mode=mode)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
img_padded = pad_dims(img, 2, nd)
ret_padded = dnn_pool(img_padded, ws, stride=stride, pad=pad, mode=mode)
return unpad_dims(ret_padded, img, 2, nd)
@register_opt('cudnn', 'fast_compile')
......@@ -1882,17 +1895,33 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not op.ignore_border:
return
inp, out, out_grad, ws, stride, pad = inputs
inp = as_gpuarray_variable(inp, ctx_name)
out = as_gpuarray_variable(out, ctx_name)
out_grad = as_gpuarray_variable(out_grad, ctx_name)
nd = op.ndim if op.ndim else (inp.ndim - 2)
if nd not in (2, 3):
return
inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
mode = op.mode
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(out_grad),
ws,
stride,
pad)
if inp.ndim == nd + 2:
return GpuDnnPoolGrad(mode=mode)(inp,
out,
out_grad,
ws,
stride,
pad)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded = pad_dims(inp, 2, nd)
out_padded = pad_dims(out, 2, nd)
out_grad_padded = pad_dims(out_grad, 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_padded,
out_grad_padded,
ws,
stride,
pad)
return unpad_dims(ret_padded, inp, 2, nd)
@register_opt('cudnn', 'fast_compile')
......@@ -1904,16 +1933,28 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not op.ignore_border:
return
inp, out_grad, ws, stride, pad = inputs
inp = as_gpuarray_variable(inp, ctx_name)
out_grad = as_gpuarray_variable(out_grad, ctx_name)
nd = op.ndim if op.ndim else (inp.ndim - 2)
if nd not in (2, 3):
return
inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
mode = op.mode
cg = gpu_contiguous(out_grad)
# We reuse cg because cuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
return GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp), cg, cg, ws, stride, pad)
if inp.ndim == nd + 2:
# We reuse out_grad because cuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
return GpuDnnPoolGrad(mode=mode)(inp, out_grad, out_grad, ws, stride, pad)
else:
inp_padded = pad_dims(inp, 2, nd)
out_grad_padded = pad_dims(out_grad, 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_grad_padded,
out_grad_padded,
ws,
stride,
pad)
return unpad_dims(ret_padded, inp, 2, nd)
@register_opt('cudnn', 'fast_compile')
......
......@@ -3,12 +3,12 @@ from functools import wraps
import numpy
from theano import scalar as scal, Constant
from theano import tensor, scalar as scal, Constant
from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError)
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, gpu_alloc_empty
from .basic_ops import GpuFromHost, HostFromGpu, GpuAllocEmpty, GpuReshape, gpu_alloc_empty
from .elemwise import GpuDimShuffle, GpuElemwise
_one = scal.constant(numpy.asarray(1.0, dtype='float32'))
......@@ -329,3 +329,48 @@ def inplace_allocempty(op, idx):
return maker(node, inputs)
return opt
return wrapper
def pad_dims(input, leftdims, rightdims):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert input.ndim >= rightdims
if input.ndim == (leftdims + rightdims):
return input
# extract image dimensions
img_shape = input.shape[-rightdims:]
# count the number of "leading" dimensions, store as dmatrix
batch_size = tensor.prod(input.shape[:-rightdims])
batch_size = tensor.shape_padright(batch_size, 1)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape = tensor.cast(tensor.join(0, batch_size,
tensor.as_tensor([1] * (leftdims - 1)),
img_shape), 'int64')
input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
return input_ND
def unpad_dims(output, input, leftdims, rightdims):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if output.ndim == input.ndim:
return output
# restore the output to the original shape
outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:])
return GpuReshape(input.ndim)(output, outshp)
......@@ -11,11 +11,12 @@ from six import StringIO
import theano.tensor as T
import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs
from theano.tensor.signal.pool import pool_2d
from theano.tensor.signal.pool import MaxPoolGrad, AveragePoolGrad
from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from .. import dnn
from ..basic_ops import GpuAllocEmpty
from ..type import gpuarray_shared_constructor
from .config import mode_with_gpu, mode_without_gpu, test_ctx_name
from . import test_nnet
......@@ -166,6 +167,27 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None,
return pooled_output
def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
pool_function=T.max, mode='ignore_borders'):
if strides is None:
strides = ds
shape = input.shape
# reshape to B, C*0, 1, 2 and do the pooling on 1, 2
first = input.reshape((shape[0], shape[1] * shape[2], shape[3], shape[4]))
pooled1 = pool_2d_i2n(first, ds=ds[1:], strides=strides[1:], pad=pad[1:],
pool_function=pool_function, mode=mode)
shp1 = pooled1.shape
# reshape to B, C, 0, 1*2 and do the pooling on 0
second = pooled1.reshape((shape[0], shape[1], shape[2], shp1[2] * shp1[3]))
pooled2 = pool_2d_i2n(second, ds=(ds[0], 1), strides=(strides[0], 1),
pad=(pad[0], 0), pool_function=pool_function, mode=mode)
shp2 = pooled2.shape
return pooled2.reshape((shape[0], shape[1], shp2[2], shp1[2], shp1[3]))
def test_pooling():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
......@@ -178,7 +200,7 @@ def test_pooling():
x = T.ftensor4()
for mode, pad in product(modes,
((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))):
((0, 0), (1, 0), (0, 1), (2, 3), (3, 2))):
if mode == 'max':
func = T.max
else:
......@@ -278,6 +300,119 @@ def test_pooling():
utt.assert_allclose(c_out, g_out)
def test_pooling_3d():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
# 'average_exc_pad' is disabled for versions < 4004
if dnn.version(raises=False) < 4004:
modes = ('max', 'average_inc_pad')
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
x = T.ftensor5()
for mode, pad in product(modes,
((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1),
(3, 2, 2), (2, 3, 2), (2, 2, 3))):
if mode == 'max':
func = T.max
else:
func = T.mean
if pad != (0, 0, 0) and func is T.mean:
continue
for ws in (4, 2, 5):
for stride in (2, 3):
if stride > ws:
continue
if pad[0] > stride or pad[1] > stride or pad[2] > stride:
# Not implemented
continue
# We will check that the opt introduced it.
out1 = pool_3d(x, (ws, ws, ws),
st=(stride, stride, stride),
ignore_border=True,
padding=pad, mode=mode)
out2 = pool3d2d(x, ds=(ws, ws, ws), strides=(stride, stride, stride),
pad=pad,
pool_function=func)
mode_without_gpu2 = mode_without_gpu.including()
mode_without_gpu2.check_isfinite = False
f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_without_gpu2)
assert not any([isinstance(node.op, dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 30, 30, 30),
(1, 3, 29, 29, 29),
(3, 1, 47, 97, 47),
]:
data = numpy.random.normal(0, 1, shp).astype("float32")
a = f1(data)
b = f2(data)
utt.assert_allclose(a, b)
# Test the grad
for shp in [(1, 1, 2, 2, 2),
(1, 1, 3, 3, 3)]:
data = numpy.random.normal(0, 1, shp).astype("float32") * 10
ws = 2
stride = 2
if pad[0] > stride or pad[1] > stride or pad[2] > stride:
# Not implemented
continue
# This test the CPU grad + opt + GPU implemtentation
def fn(x):
return pool_3d(x, (ws, ws, ws), ignore_border=True,
padding=pad, mode=mode)
utt.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that the opt would have inserted it.
fg = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
# Test the GPU grad + GPU implementation
def fn(x):
dnn_op = dnn.dnn_pool(
x, ws=(ws, ws, ws),
stride=(stride, stride, stride),
pad=pad,
mode=mode)
return dnn_op
utt.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that we get the good op.
fg = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
g_out = fg(data)
# Compare against the CPU result
out = pool_3d(x, (ws, ws, ws),
padding=pad,
ignore_border=True, mode=mode)
fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu)
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
c_out = fc(data)
utt.assert_allclose(c_out, g_out)
def test_pooling_with_tensor_vars():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
......@@ -331,6 +466,7 @@ def test_pooling_opt():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
# 2D pooling
x = T.fmatrix()
f = theano.function(
......@@ -344,6 +480,7 @@ def test_pooling_opt():
f(numpy.zeros((10, 10), dtype='float32'))
# gradient of 2D pooling
f = theano.function(
[x],
T.grad(pool_2d(x, ds=(2, 2), mode='average_inc_pad',
......@@ -362,11 +499,86 @@ def test_pooling_opt():
pool_2d(x, ds=(2, 3), mode='sum',
ignore_border=True),
mode=mode_with_gpu)
data = numpy.random.rand(10, 10).astype('float32')
f(data)
# 3D pooling
x = T.ftensor3()
f = theano.function(
[x],
pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad',
ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
data = numpy.random.rand(10, 10).astype('float32')
f(data)
f(numpy.zeros((10, 10, 10), dtype='float32'))
# gradient of 3D pooling
f = theano.function(
[x],
T.grad(pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad',
ignore_border=True).sum(),
x),
mode=mode_with_gpu.including("cudnn"))
assert any([isinstance(n.op, dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
def test_pooling_opt_arbitrary_dimensions():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
# 'average_exc_pad' is disabled for versions < 4004
if dnn.version(raises=False) < 4004:
modes = ('max', 'average_inc_pad')
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
for n_non_pool_dims in (0, 1, 2, 3):
for ws in ((2, 2), (3, 3, 3)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp = (2,) * n_non_pool_dims + (5,) * len(ws)
data = numpy.random.normal(0, 1, shp).astype('float32')
input = gpuarray_shared_constructor(data)
for mode in modes:
out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws)
out_pool_grad = T.grad(T.sum(out_pool), wrt=input)
out = [out_pool, out_pool_grad]
# run on GPU
fg = theano.function([], out, mode=mode_with_gpu)
assert any([isinstance(node.op, dnn.GpuDnnPool)
for node in fg.maker.fgraph.toposort()])
assert any([isinstance(node.op, dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
res_gpu = fg()
# run on CPU
fc = theano.function([], out, mode=mode_without_gpu)
assert any([isinstance(node.op, Pool)
for node in fc.maker.fgraph.toposort()])
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
res_cpu = fg()
# check for similarity
utt.assert_allclose(res_gpu[0], res_cpu[0])
utt.assert_allclose(res_gpu[1], res_cpu[1])
def test_dnn_tag():
......@@ -625,6 +837,33 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnPool
)
def test_pool_3d(self):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor5('img')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5, 6),
dtype='float32'
)
# 'average_exc_pad' is disabled for versions < 4004
if dnn.version(raises=False) < 4004:
modes = ['max', 'average_inc_pad']
else:
modes = ['max', 'average_inc_pad', 'average_exc_pad']
for params in product(
[(1, 1, 1), (2, 2, 2), (3, 3, 3)],
[(1, 1, 1), (2, 2, 2), (3, 3, 3)],
modes
):
self._compile_and_check(
[img],
[dnn.GpuDnnPool(mode=params[2])(img, params[0], params[1], (0, 0, 0))],
[img_val],
dnn.GpuDnnPool
)
def test_pool_grad(self):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
......@@ -664,6 +903,45 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnPoolGrad
)
def test_pool_3d_grad(self):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor5('img')
img_grad = T.ftensor5('img_grad')
out = T.ftensor5('out')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5, 6),
dtype='float32'
)
img_grad_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5, 6),
dtype='float32'
)
out_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5, 6),
dtype='float32'
)
for params in product(
[(1, 1, 1), (2, 2, 2), (3, 3, 3)],
[(1, 1, 1), (2, 2, 2), (3, 3, 3)],
['max', 'average_inc_pad']
):
pool_grad = dnn.GpuDnnPoolGrad(mode=params[2])(
img,
out,
img_grad,
params[0],
params[1],
(0, 0, 0)
)
self._compile_and_check(
[img, img_grad, out],
[pool_grad],
[img_val, img_grad_val, out_val],
dnn.GpuDnnPoolGrad
)
# this has been a problem in the past
def test_dnn_conv_border_mode():
......
......@@ -30,7 +30,8 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
GpuDownsampleFactorMaxGrad)
from theano.sandbox.cuda.nnet import GpuSoftmax
from theano.sandbox.cuda.opt_util import alpha_merge, output_merge
from theano.sandbox.cuda.opt_util import (alpha_merge, output_merge,
pad_dims, unpad_dims)
from theano.sandbox.cuda import gpu_seqopt, register_opt
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
......@@ -1391,20 +1392,23 @@ class GpuDnnPoolDesc(GpuOp):
def do_constant_folding(self, node):
return False
def __init__(self, ws=(1, 1), stride=(1, 1), mode='max', pad=(0, 0)):
def __init__(self, ws=(1, 1), stride=None, mode='max', pad=None):
if mode == 'average':
mode = 'average_inc_pad'
assert mode in ('max', 'average_inc_pad', 'average_exc_pad')
self.mode = mode
if stride is None:
stride = (1,) * len(ws)
if pad is None:
pad = (0,) * len(ws)
assert len(ws) == len(stride) and len(stride) == len(pad)
assert len(ws) in (2, 3)
self.ws = ws
self.stride = stride
self.pad = pad
if (pad[0] != 0 or pad[1] != 0) and version() == -1:
raise RuntimeError("cuDNN pooling with padding requires cuDNN v2")
if self.get_ndim() == 3 and version() < (3000, 3000):
raise RuntimeError("cuDNN 3d pooling requires cuDNN v3")
if (mode == 'average_exc_pad' and max(pad) > 0 and
......@@ -1418,12 +1422,9 @@ class GpuDnnPoolDesc(GpuOp):
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, 'pad'):
self.pad = (0, 0)
self.pad = (0,) * self.get_ndim()
def make_node(self):
if self.pad != (0, 0) and version() == -1:
raise RuntimeError("cuDNN pooling with padding requires cuDNN v2")
node = Apply(self, [],
[CDataType("cudnnPoolingDescriptor_t",
freefunc="cudnnDestroyPoolingDescriptor")()])
......@@ -1444,8 +1445,6 @@ class GpuDnnPoolDesc(GpuOp):
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
else:
raise NotImplementedError("Unsupported pooling model.")
......@@ -1616,8 +1615,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
else:
raise NotImplementedError("Unsupported pooling model.")
......@@ -1872,8 +1869,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif self.mode == "average_exc_pad":
mode_flag = 'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if version() == -1:
raise Exception("cudnn v1 do not support average_exc_pad")
else:
raise NotImplementedError("Unsupported pooling model.")
......@@ -1976,28 +1971,33 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return [shape[0]]
def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
def dnn_pool(img, ws, stride=None, mode='max', pad=None):
"""
GPU pooling using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
'first dim', 'second dim' in that order.
For 2D pooling, the memory layout to use is 'bc01', that is 'batch',
'channel', 'first dim', 'second dim' in that order.
For 3D pooling, the memory layout to use is 'bc012', that is 'batch',
'channel', 'first dim', 'second dim', 'third dim'.
Parameters
----------
img
Images to do the pooling over.
ws
Subsampling window size.
Subsampling window size. Should have 2 or 3 elements.
stride
Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad, 'sum'}
pad :
(pad_h, pad_w) padding information.
Subsampling stride (default: (1, 1) or (1, 1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad
Padding: (pad_h, pad_w) for 2D or (pad_h, pad_w, pad_d) for 3D.
pad_h is the number of zero-valued pixels added to each of the top and
bottom borders.
pad_w is the number of zero-valued pixels added to each of the left
and right borders.
pad_d is the number of zero-valued pixels added to each of the front
and back borders (3D pooling only).
.. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
......@@ -2009,6 +2009,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
img = gpu_contiguous(img)
if stride is None:
stride = (1,) * len(ws)
if pad is None:
pad = (0,) * len(ws)
if mode == "sum":
ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
window_elem = theano.tensor.prod(ws).astype(ret.dtype)
......@@ -2972,10 +2976,21 @@ if True:
if not node.op.ignore_border:
return
img, ws, stride, pad = node.inputs
nd = node.op.ndim if node.op.ndim else (img.ndim - 2)
mode = node.op.mode
if nd not in (2, 3):
return
if (img.owner and isinstance(img.owner.op, HostFromGpu)):
ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
ws, stride=stride, pad=pad, mode=mode)
if img.ndim == nd + 2:
ret = dnn_pool(gpu_contiguous(img.owner.inputs[0]),
ws, stride=stride, pad=pad, mode=mode)
else:
input = gpu_contiguous(img.owner.inputs[0])
# reshape to 4D or 5D with 2 non-pooling dimensions
input_padded = pad_dims(input, 2, nd)
ret_padded = dnn_pool(input_padded,
ws, stride=stride, pad=pad, mode=mode)
ret = unpad_dims(ret_padded, input, 2, nd)
return [host_from_gpu(ret)]
@register_opt('cudnn')
......@@ -3003,17 +3018,30 @@ if True:
if not node.op.ignore_border:
return
inp, out, inp_grad, ws, stride, pad = node.inputs
nd = node.op.ndim if node.op.ndim else (inp.ndim - 2)
mode = node.op.mode
if nd not in (2, 3):
return
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
(out.owner and isinstance(out.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op,
HostFromGpu))):
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(inp_grad),
ws, stride, pad)
if inp.ndim == nd + 2:
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
gpu_contiguous(out),
gpu_contiguous(inp_grad),
ws, stride, pad)
else:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded = pad_dims(gpu_contiguous(inp), 2, nd)
out_padded = pad_dims(gpu_contiguous(out), 2, nd)
inp_grad_padded = pad_dims(gpu_contiguous(inp_grad), 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
out_padded,
inp_grad_padded,
ws, stride, pad)
ret = unpad_dims(ret_padded, inp, 2, nd)
return [host_from_gpu(ret)]
@register_opt('cudnn')
......@@ -3025,16 +3053,28 @@ if True:
if not node.op.ignore_border:
return
inp, inp_grad, ws, stride, pad = node.inputs
nd = node.op.ndim if node.op.ndim else (inp.ndim - 2)
mode = node.op.mode
if nd not in (2, 3):
return
if ((inp.owner and isinstance(inp.owner.op, HostFromGpu)) or
(inp_grad.owner and isinstance(inp_grad.owner.op,
HostFromGpu))):
contiguous_inp_grad = gpu_contiguous(inp_grad)
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
contiguous_inp_grad,
contiguous_inp_grad,
ws, stride, pad)
if inp.ndim == nd + 2:
contiguous_inp_grad = gpu_contiguous(inp_grad)
ret = GpuDnnPoolGrad(mode=mode)(gpu_contiguous(inp),
contiguous_inp_grad,
contiguous_inp_grad,
ws, stride, pad)
else:
inp_padded = pad_dims(gpu_contiguous(inp), 2, nd)
inp_grad_padded = pad_dims(gpu_contiguous(inp_grad), 2, nd)
ret_padded = GpuDnnPoolGrad(mode=mode)(inp_padded,
inp_grad_padded,
inp_grad_padded,
ws, stride, pad)
ret = unpad_dims(ret_padded, inp, 2, nd)
return [host_from_gpu(ret)]
@register_opt('cudnn')
......
......@@ -40,6 +40,7 @@ from theano.sandbox.cuda.basic_ops import (
GpuSubtensor, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20,
GpuIncSubtensor, gpu_alloc, GpuAlloc, gpu_shape, GpuSplit, GpuAllocEmpty)
from theano.sandbox.cuda.opt_util import pad_dims, unpad_dims
from theano.sandbox.cuda.type import CudaNdarrayType
from theano.sandbox.cuda.blas import (
......@@ -1891,15 +1892,12 @@ def local_convtransp3d_gemm(node):
gpu_optimizer.register("convtransp3d_gemm", local_convtransp3d_gemm)
def _check_constant_args_pool(ws, stride, pad, node):
def _check_constant_args_pool(ndim, ws, stride, pad, node):
"""Check if the args of pool are constants. Warns if not."""
try:
ws_w = tensor.get_scalar_constant_value(ws[0])
ws_h = tensor.get_scalar_constant_value(ws[1])
stride_w = tensor.get_scalar_constant_value(stride[0])
stride_h = tensor.get_scalar_constant_value(stride[1])
pad_w = tensor.get_scalar_constant_value(pad[0])
pad_h = tensor.get_scalar_constant_value(pad[1])
ws = tuple(tensor.get_scalar_constant_value(ws[i]) for i in range(ndim))
stride = tuple(tensor.get_scalar_constant_value(stride[i]) for i in range(ndim))
pad = tuple(tensor.get_scalar_constant_value(pad[i]) for i in range(ndim))
except tensor.NotScalarConstantError:
msg = ("Pool with tensor variable for the window size, stride or "
"padding is only supported in the new GPU backend, so this op "
......@@ -1909,65 +1907,96 @@ def _check_constant_args_pool(ws, stride, pad, node):
elif config.assert_no_cpu_op == "raise":
raise AssertionError(msg)
return None
ws = (ws_w, ws_h)
stride = (stride_w, stride_h)
pad = (pad_w, pad_h)
return ws, stride, pad
@register_opt()
@local_optimizer([pool.Pool])
def local_gpu_downsample_factor_max(node):
if isinstance(node.op, pool.Pool):
assert node.op.__props__ == ('ignore_border', 'mode')
if (isinstance(node.op, pool.Pool)):
assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node)
nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None:
return
ws, stride, pad = ret
if (pad) != (0, 0) or node.op.mode != 'max' or stride != ws:
if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return
if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds = GpuDownsampleFactorMax(ws, node.op.ignore_border)
return [host_from_gpu(gpu_ds(x.owner.inputs[0]))]
gpu_ws = GpuDownsampleFactorMax(ws, node.op.ignore_border)
if node.inputs[0].ndim == 4:
return [host_from_gpu(gpu_ws(x.owner.inputs[0]))]
else:
input_4D = pad_dims(x.owner.inputs[0], 2, 2)
output_4D = gpu_ws(input_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt()
@local_optimizer([pool.MaxPoolGrad])
def local_gpu_downsample_factor_max_grad(node):
if isinstance(node.op, pool.MaxPoolGrad):
assert node.op.__props__ == ('ignore_border', 'mode')
if (isinstance(node.op, pool.MaxPoolGrad)):
assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, z, gz, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node)
nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None:
return
ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws:
if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return
if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_ds_grad = GpuDownsampleFactorMaxGrad(ws, node.op.ignore_border)
return [host_from_gpu(gpu_ds_grad(x.owner.inputs[0],
as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gz)))]
gpu_ws_grad = GpuDownsampleFactorMaxGrad(ws, node.op.ignore_border)
if node.inputs[0].ndim == 4:
return [host_from_gpu(gpu_ws_grad(x.owner.inputs[0],
as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gz)))]
else:
x_4D = pad_dims(x.owner.inputs[0], 2, 2)
z_4D = pad_dims(as_cuda_ndarray_variable(z), 2, 2)
gz_4D = pad_dims(as_cuda_ndarray_variable(gz), 2, 2)
output_4D = gpu_ws_grad(x_4D, z_4D, gz_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt()
@local_optimizer([pool.DownsampleFactorMaxGradGrad])
def local_gpu_downsample_factor_max_grad_grad(node):
if isinstance(node.op, pool.DownsampleFactorMaxGradGrad):
assert node.op.__props__ == ('ignore_border', 'mode')
assert node.op.__props__ == ('ndim', 'ignore_border', 'mode')
x, z, gx, ws, stride, pad = node.inputs
ret = _check_constant_args_pool(ws, stride, pad, node)
nd = node.op.ndim if node.op.ndim else (x.ndim - 2)
ret = _check_constant_args_pool(nd, ws, stride, pad, node)
if ret is None:
return
ws, stride, pad = ret
if pad != (0, 0) or node.op.mode != 'max' or stride != ws:
if (nd != 2 or
max(node.op.padding) != 0 or
node.op.mode != 'max' or
stride != ws):
return
if (x.owner and isinstance(x.owner.op, HostFromGpu)):
op = GpuDownsampleFactorMaxGradGrad(ws, node.op.ignore_border)
return [host_from_gpu(op(x.owner.inputs[0],
as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gx)))]
if node.inputs[0].ndim == 4:
return [host_from_gpu(op(x.owner.inputs[0],
as_cuda_ndarray_variable(z),
as_cuda_ndarray_variable(gx)))]
else:
x_4D = pad_dims(x.owner.inputs[0], 2, 2)
z_4D = pad_dims(as_cuda_ndarray_variable(z), 2, 2)
gx_4D = pad_dims(as_cuda_ndarray_variable(gx), 2, 2)
output_4D = op(x_4D, z_4D, gx_4D)
output = unpad_dims(output_4D, x.owner.inputs[0], 2, 2)
return [host_from_gpu(output)]
@register_opt()
......
......@@ -3,13 +3,13 @@ from functools import wraps
import numpy
from theano import scalar as scal, Constant
from theano import tensor, scalar as scal, Constant
from theano.gof import local_optimizer
from theano.tensor import (DimShuffle, get_scalar_constant_value,
NotScalarConstantError)
from theano.sandbox.cuda.basic_ops import (
GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise)
GpuFromHost, HostFromGpu, host_from_gpu, GpuDimShuffle, GpuElemwise, GpuReshape)
_one = scal.constant(numpy.asarray(1.0, dtype='float32'))
......@@ -126,3 +126,48 @@ def output_merge(cls, alpha_in, beta_in, out_in):
return maker(targ, *inputs)
return opt
return wrapper
def pad_dims(input, leftdims, rightdims):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert input.ndim >= rightdims
if input.ndim == (leftdims + rightdims):
return input
# extract image dimensions
img_shape = input.shape[-rightdims:]
# count the number of "leading" dimensions, store as dmatrix
batch_size = tensor.prod(input.shape[:-rightdims])
batch_size = tensor.shape_padright(batch_size, 1)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape = tensor.cast(tensor.join(0, batch_size,
tensor.as_tensor([1] * (leftdims - 1)),
img_shape), 'int64')
input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
return input_ND
def unpad_dims(output, input, leftdims, rightdims):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if output.ndim == input.ndim:
return output
# restore the output to the original shape
outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:])
return GpuReshape(input.ndim)(output, outshp)
......@@ -326,7 +326,9 @@ if 0:
def test_downsample():
shps = [(1, 1, 1, 12),
shps = [(1, 12),
(1, 1, 12),
(1, 1, 1, 12),
(1, 1, 2, 2),
(1, 1, 1, 1),
(1, 1, 4, 4),
......@@ -359,17 +361,17 @@ def test_downsample():
for shp in shps:
for ds in (2, 2), (3, 2), (1, 1):
if ds[0] > shp[2]:
if ds[0] > shp[-2]:
continue
if ds[1] > shp[3]:
if ds[1] > shp[-1]:
continue
# GpuDownsampleFactorMax doesn't like having more than 512 columns
# in the output tensor.
if float(shp[3]) / ds[1] > 512:
if float(shp[-1]) / ds[1] > 512:
continue
for ignore_border in (True, False):
# print 'test_downsample', shp, ds, ignore_border
ds_op = Pool(ignore_border=ignore_border)
ds_op = Pool(ndim=len(ds), ignore_border=ignore_border)
a = tcn.shared_constructor(my_rand(*shp), 'a')
f = pfunc([], ds_op(tensor.as_tensor_variable(a), ds),
......
......@@ -15,8 +15,8 @@ import theano
import theano.tensor as T
import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs
from theano.tensor.signal.pool import pool_2d
from theano.tensor.signal.pool import MaxPoolGrad, AveragePoolGrad
from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
from theano.sandbox.cuda import float32_shared_constructor as shared
......@@ -170,7 +170,7 @@ def test_dnn_conv_inplace():
def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
pool_func=T.max, mode='ignore_borders'):
pool_function=T.max, mode='ignore_borders'):
if strides is None:
strides = ds
......@@ -179,13 +179,13 @@ def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
# resahpe to B, C*0, 1, 2 and do the pooling on 1, 2
first = input.reshape((shape[0], shape[1] * shape[2], shape[3], shape[4]))
pooled1 = pool_2d_i2n(first, ds=ds[1:], strides=strides[1:], pad=pad[1:],
pool_function=pool_func, mode=mode)
pool_function=pool_function, mode=mode)
shp1 = pooled1.shape
# reshape to B, C, 0, 1*2 and do the pooling on 0
second = pooled1.reshape((shape[0], shape[1], shape[2], shp1[2] * shp1[3]))
pooled2 = pool_2d_i2n(second, ds=(ds[0], 1), strides=(strides[0], 1),
pad=(pad[0], 0), pool_function=pool_func, mode=mode)
pad=(pad[0], 0), pool_function=pool_function, mode=mode)
shp2 = pooled2.shape
return pooled2.reshape((shape[0], shape[1], shp2[2], shp1[2], shp1[3]))
......@@ -241,8 +241,6 @@ def test_pooling():
func = T.max
else:
func = T.mean
if pad != (0, 0) and cuda.dnn.version() == -1:
continue
if pad != (0, 0) and func is T.mean:
continue
......@@ -418,6 +416,7 @@ def test_pooling3d():
if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000):
raise SkipTest(cuda.dnn.dnn_available.msg)
# We force the FAST_RUN as we don't want the reference to run in DebugMode.
mode_without_gpu_ref = theano.compile.mode.get_mode(
'FAST_RUN').excluding('gpu')
......@@ -427,8 +426,7 @@ def test_pooling3d():
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
x = T.TensorType(broadcastable=(False, False, False, False, False),
dtype='float32')()
x = T.ftensor5()
for mode, pad in product(modes,
((0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1),
(2, 3, 2), (3, 2, 2), (2, 2, 3))):
......@@ -436,8 +434,6 @@ def test_pooling3d():
func = T.max
else:
func = T.mean
if pad != (0, 0, 0) and cuda.dnn.version() == -1:
continue
if pad != (0, 0, 0) and func is T.mean:
continue
......@@ -449,13 +445,13 @@ def test_pooling3d():
if pad[0] > stride or pad[1] > stride or pad[2] > stride:
# Not implemented
continue
out1 = cuda.dnn.dnn_pool(x, (ws, ws, ws),
stride=(stride, stride, stride),
pad=pad, mode=mode)
out2 = pool3d2d(x, ds=(ws, ws, ws),
strides=(stride, stride, stride),
pad=pad, pool_func=func)
out1 = pool_3d(x, (ws, ws, ws),
st=(stride, stride, stride),
ignore_border=True,
padding=pad, mode=mode)
out2 = pool3d2d(x, ds=(ws, ws, ws), strides=(stride, stride, stride),
pad=pad,
pool_function=func)
f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes])
......@@ -510,11 +506,17 @@ def test_pooling3d():
g_out = fg(data)
# Compare again the CPU result
out = pool3d2d(x, (ws, ws, ws),
strides=(stride, stride, stride),
pad=pad, pool_func=func)
out = pool_3d(x, (ws, ws, ws),
padding=pad,
ignore_border=True, mode=mode)
fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu_ref)
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
c_out = fc(data)
utt.assert_allclose(c_out, g_out)
......@@ -523,6 +525,7 @@ def test_pooling_opt():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
# 2D pooling
x = T.fmatrix()
f = theano.function(
......@@ -535,6 +538,7 @@ def test_pooling_opt():
f(numpy.zeros((10, 10), dtype='float32'))
# gradient of 2D pooling
f = theano.function(
[x],
T.grad(pool_2d(x, ds=(2, 2), mode='average_inc_pad',
......@@ -545,6 +549,7 @@ def test_pooling_opt():
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10), dtype='float32'))
# Test sum pooling
f = theano.function(
[x],
......@@ -557,6 +562,82 @@ def test_pooling_opt():
data = numpy.random.rand(10, 10).astype('float32')
f(data)
# 3D pooling
x = T.ftensor3()
f = theano.function(
[x],
pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad', ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
# gradient of 3D pooling
f = theano.function(
[x],
T.grad(pool_3d(x, ds=(2, 2, 2), mode='average_inc_pad',
ignore_border=True).sum(), x),
mode=mode_with_gpu.including("cudnn"))
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()])
f(numpy.zeros((10, 10, 10), dtype='float32'))
def test_pooling_opt_arbitrary_dimensions():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
# 'average_exc_pad' is disabled for versions < 4004
if cuda.dnn.version() < (4004, 4004):
modes = ('max', 'average_inc_pad')
else:
modes = ('max', 'average_inc_pad', 'average_exc_pad')
for n_non_pool_dims in (0, 1, 2, 3):
for ws in ((2, 2), (3, 3, 3)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp = (2,) * n_non_pool_dims + (5,) * len(ws)
data = numpy.random.normal(0, 1, shp).astype('float32')
input = shared(data)
for mode in modes:
out_pool = Pool(ndim=len(ws), mode=mode, ignore_border=True)(input, ws)
out_pool_grad = T.grad(T.sum(out_pool), wrt=input)
out = [out_pool, out_pool_grad]
# run on GPU
fg = theano.function([], out, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in fg.maker.fgraph.toposort()])
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
res_gpu = fg()
# run on CPU
fc = theano.function([], out, mode=mode_without_gpu)
assert any([isinstance(node.op, Pool)
for node in fc.maker.fgraph.toposort()])
if mode == 'max':
assert any([isinstance(node.op, MaxPoolGrad)
for node in fc.maker.fgraph.toposort()])
else:
assert any([isinstance(node.op, AveragePoolGrad)
for node in fc.maker.fgraph.toposort()])
res_cpu = fg()
# check for similarity
utt.assert_allclose(res_gpu[0], res_cpu[0])
utt.assert_allclose(res_gpu[1], res_cpu[1])
class test_DnnSoftMax(test_nnet.test_SoftMax):
gpu_op = dnn.GpuDnnSoftmax
......
......@@ -7,6 +7,7 @@ Pool, DownsampleAvg, DownsampleSoftmax.
from __future__ import absolute_import, print_function, division
# This file should move along with conv.py
import warnings
import itertools
import numpy
from six.moves import xrange
......@@ -84,67 +85,101 @@ def pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
" Otherwise, the convolution will be executed on CPU.",
stacklevel=2)
ignore_border = False
if input.ndim == 4:
op = Pool(ignore_border, mode=mode)
output = op(input, ds, st, padding)
return output
op = Pool(ignore_border, ndim=2, mode=mode)
output = op(input, ds, st, padding)
return output
# extract image dimensions
img_shape = input.shape[-2:]
# count the number of "leading" dimensions, store as dmatrix
batch_size = tensor.prod(input.shape[:-2])
batch_size = tensor.shape_padright(batch_size, 1)
def pool_3d(input, ds, ignore_border=None, st=None, padding=(0, 0, 0),
mode='max'):
"""Downscale the input by a specified factor
# store as 4D tensor with shape: (batch_size,1,height,width)
new_shape = tensor.cast(tensor.join(0, batch_size,
tensor.as_tensor([1]),
img_shape), 'int64')
input_4D = tensor.reshape(input, new_shape, ndim=4)
Takes as input a N-D tensor, where N >= 3. It downscales the input image by
the specified factor, by keeping only the maximum value of non-overlapping
patches of size (ds[0],ds[1],ds[2])
# downsample mini-batch of images
op = Pool(ignore_border, mode=mode)
output = op(input_4D, ds, st, padding)
Parameters
----------
input : N-D theano tensor of input images
Input images. Max pooling will be done over the 3 last dimensions.
ds : tuple of length 3 or theano vector of ints of size 3
Factor by which to downscale (vertical ds, horizontal ds, depth ds).
(2,2,2) will halve the image in each dimension.
ignore_border : bool (default None, will print a warning and set to False)
When True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output.
(3,3,3) otherwise.
st : tuple of three ints or theano vector of ints of size 3
Stride size, which is the number of shifts over rows/cols/slices to get
the next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding : tuple of two ints or theano vector of ints of size 3
(pad_h, pad_w, pad_d), pad zeros to extend beyond six borders of the
images, pad_h is the size of the top and bottom margins,
pad_w is the size of the left and right margins, and pad_d is the size
of the front and back margins
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
Operation executed on each window. `max` and `sum` always exclude
the padding in the computation. `average` gives you the choice to
include or exclude it.
# restore to original shape
outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
return tensor.reshape(output, outshp, ndim=input.ndim)
"""
if input.ndim < 3:
raise NotImplementedError('pool_3d requires a dimension >= 3')
if ignore_border is None:
warnings.warn(
"pool_3d() will have the parameter ignore_border"
" default value changed to True (currently"
" False). To have consistent behavior with all Theano"
" version, explicitly add the parameter ignore_border=True."
" On the GPU, using ignore_border=True is needed to use cuDNN."
" When using ignore_border=False and not using cuDNN, the only"
" GPU combination supported is when"
" `ds == st and padding == (0, 0, 0) and mode == 'max'`."
" Otherwise, the convolution will be executed on CPU.",
stacklevel=2)
ignore_border = False
op = Pool(ignore_border, ndim=3, mode=mode)
output = op(input, ds, st, padding)
return output
class Pool(OpenMPOp):
"""
For N-dimensional tensors, consider that the last two dimensions span
images. This Op downsamples these images by taking the max, sum or average
over different patch.
The constructor takes the max, sum or average or different input patches.
This Op downsamples the last N dimensions of the input by taking the max,
sum or average over different patches.
Parameters
----------
ds : list or tuple of two ints
Downsample factor over rows and column.
ds indicates the pool region size.
ds : list or tuple of N ints
Downsample factor over rows, columns etc.
ds indicates the size of the pooling region.
ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col
If ds doesn't divide imgshape, do we include an extra row/col/slice
of partial downsampling (False) or ignore it (True).
st : list or tuple of two ints or None
Stride size, which is the number of shifts over rows/cols to get the
st : list or tuple of N ints or None
Stride size, which is the number of shifts over rows/cols/slices to get the
next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding: tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders of the images,
pad_h is the size of the top and bottom margins, and pad_w is the size
of the left and right margins.
padding : tuple of N ints or None
For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
('average_inc_pad' excludes the padding from the count,
'average_exc_pad' include it)
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
"""
__props__ = ('ignore_border', 'mode')
__props__ = ('ndim', 'ignore_border', 'mode')
@staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=None, ndim=None):
"""
Return the shape of the output from this op, for input of given
shape and flags.
......@@ -152,88 +187,79 @@ class Pool(OpenMPOp):
Parameters
----------
imgshape : tuple, list, or similar of integer or scalar Theano variable
The shape of a tensor of images. The last two elements are
The shape of a tensor of images. The last N elements are
interpreted as the number of rows, and the number of cols.
ds : list or tuple of two ints
Downsample factor over rows and columns this parameter indicates
the size of the pooling region.
st : list or tuple of two ints
The stride size. This is the distance between the pooling regions.
If it's set to None, it equals ds.
ds : list or tuple of N ints
Downsample factor over rows and column.
ds indicates the pool region size.
ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col of
partial downsampling (False) or ignore it (True).
padding : tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
If ds doesn't divide imgshape, do we include an extra row/col/slice
of partial downsampling (False) or ignore it (True).
st : list or tuple of N ints or None
Stride size, which is the number of shifts over rows/cols/slices to get the
next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding : tuple of N ints or None
For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
Returns
-------
list
The shape of the output from this op, for input of given shape.
This will have the same length as imgshape, but with last two
This will have the same length as imgshape, but with last N
elements reduced as per the downsampling & ignore_border flags.
"""
if len(imgshape) < 2:
raise TypeError('imgshape must have at least two elements '
'(rows, cols)')
if ndim is None:
ndim = len(imgshape) - 2
if len(imgshape) < ndim:
raise TypeError('imgshape must have at least {} dimensions'.format(ndim))
if st is None:
st = ds
r, c = imgshape[-2:]
r = tensor.extract_constant(r)
c = tensor.extract_constant(c)
if padding[0]:
r = r + padding[0] * 2
if padding[1]:
c = c + padding[1] * 2
if ignore_border:
if ds[0] == st[0]:
nr = r // st[0]
else:
out_r = (r - ds[0]) // st[0] + 1
if isinstance(r, theano.Variable):
nr = tensor.maximum(out_r, 0)
if padding is None:
padding = (0,) * ndim
patch_shape = tuple(tensor.extract_constant(imgshape[-ndim + i]) + padding[i] * 2
for i in xrange(ndim))
def compute_out(v, downsample, stride):
if ignore_border:
if downsample == stride:
return v // stride
else:
nr = numpy.maximum(out_r, 0)
if ds[1] == st[1]:
nc = c // st[1]
out = (v - downsample) // stride + 1
if isinstance(out, theano.Variable):
return tensor.maximum(out, 0)
else:
return numpy.maximum(out, 0)
else:
out_c = (c - ds[1]) // st[1] + 1
if isinstance(c, theano.Variable):
nc = tensor.maximum(out_c, 0)
if isinstance(v, theano.Variable):
return tensor.switch(tensor.ge(stride, downsample),
(v - 1) // stride + 1,
tensor.maximum(0, (v - 1 - downsample) //
stride + 1) + 1)
elif stride >= downsample:
return (v - 1) // stride + 1
else:
nc = numpy.maximum(out_c, 0)
else:
if isinstance(r, theano.Variable):
nr = tensor.switch(tensor.ge(st[0], ds[0]),
(r - 1) // st[0] + 1,
tensor.maximum(0, (r - 1 - ds[0]) //
st[0] + 1) + 1)
elif st[0] >= ds[0]:
nr = (r - 1) // st[0] + 1
else:
nr = max(0, (r - 1 - ds[0] + st[0]) // st[0]) + 1
if isinstance(c, theano.Variable):
nc = tensor.switch(tensor.ge(st[1], ds[1]),
(c - 1) // st[1] + 1,
tensor.maximum(0, (c - 1 - ds[1]) //
st[1] + 1) + 1)
elif st[1] >= ds[1]:
nc = (c - 1) // st[1] + 1
else:
nc = max(0, (c - 1 - ds[1] + st[1]) // st[1]) + 1
return max(0, (v - 1 - downsample + stride) // stride) + 1
out_shape = [compute_out(patch_shape[i], ds[i], st[i]) for i in xrange(ndim)]
rval = list(imgshape[:-2]) + [nr, nc]
rval = list(imgshape[:-ndim]) + out_shape
return rval
def __init__(self, ignore_border=False, mode='max', openmp=None):
def __init__(self, ignore_border=False, mode='max', ndim=None, openmp=None):
super(Pool, self).__init__(openmp=openmp)
self.ndim = ndim
self.ignore_border = ignore_border
if mode not in ['max', 'average_inc_pad', 'average_exc_pad', 'sum']:
raise ValueError(
......@@ -244,6 +270,7 @@ class Pool(OpenMPOp):
def prepare_node(self, node, storage_map, compute_map):
if len(node.inputs) == 1:
# Old interface
self.ndim = len(node.op.ds)
self.mode = node.op.mode
ws = theano.tensor.constant(node.op.ds)
st = theano.tensor.constant(node.op.st)
......@@ -270,17 +297,22 @@ class Pool(OpenMPOp):
storage_map[pad] = [None]
compute_map[pad] = [False]
def make_node(self, x, ws, stride=None, pad=(0, 0)):
def make_node(self, x, ws, stride=None, pad=None):
# TODO: consider restricting the dtype?
x = tensor.as_tensor_variable(x)
nd = self.ndim
if nd is None:
nd = x.type.ndim - 2
if stride is None:
stride = ws
if isinstance(pad, (tuple, list)):
if tuple(pad) != (0, 0) and not self.ignore_border:
if pad is None:
pad = (0,) * nd
elif isinstance(pad, (tuple, list)):
if max(pad) != 0 and not self.ignore_border:
raise NotImplementedError(
'padding works only with ignore_border=True')
if isinstance(ws, (tuple, list)):
if pad[0] >= ws[0] or pad[1] >= ws[1]:
if any(pad[i] >= ws[i] for i in range(nd)):
raise NotImplementedError(
'padding_h and padding_w must be smaller than strides')
ws = tensor.as_tensor_variable(ws)
......@@ -289,7 +321,7 @@ class Pool(OpenMPOp):
assert ws.ndim == 1
assert stride.ndim == 1
assert pad.ndim == 1
if x.type.ndim != 4:
if x.type.ndim < nd:
raise TypeError()
if not ws.dtype.startswith('int'):
raise TypeError('Pool downsample parameters must be ints.')
......@@ -298,42 +330,36 @@ class Pool(OpenMPOp):
if not pad.dtype.startswith('int'):
raise TypeError('Padding parameters must be ints.')
# If the input shape are broadcastable we can have 0 in the output shape
broad = x.broadcastable[:2] + (False, False)
broad = x.broadcastable[:-nd] + (False,) * nd
out = tensor.TensorType(x.dtype, broad)
return gof.Apply(self, [x, ws, stride, pad], [out()])
def perform(self, node, inp, out):
x, ws, stride, pad = inp
z, = out
assert ws.shape == stride.shape == pad.shape == (2,)
if len(x.shape) != 4:
nd = self.ndim
if nd is None:
nd = len(x.shape) - 2
assert ws.shape == stride.shape == pad.shape == (nd,)
if len(x.shape) < nd:
raise NotImplementedError(
'Pool requires 4D input for now')
z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad)
'Pool requires input with {} or more dimensions'.format(nd))
z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad, nd)
if not self.ignore_border:
assert z_shape[2] > 0
assert z_shape[3] > 0
assert all(z > 0 for z in z_shape[-nd:])
if (z[0] is None) or (z[0].shape != z_shape):
z[0] = numpy.empty(z_shape, dtype=x.dtype)
zz = z[0]
# number of pooling output rows
pr = zz.shape[-2]
# number of pooling output cols
pc = zz.shape[-1]
ws0, ws1 = ws
st0, st1 = stride
pad_h = pad[0]
pad_w = pad[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
# size of pooling output
pool_out_shp = zz.shape[-nd:]
img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in xrange(nd))
inc_pad = self.mode == 'average_inc_pad'
# pad the image
if (pad_h, pad_w) != (0, 0):
y = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype)
y[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)] = x
if max(pad) != 0:
y = numpy.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
y[(slice(None),) * (len(x.shape) - nd) +
tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))] = x
else:
y = x
func = numpy.max
......@@ -342,28 +368,30 @@ class Pool(OpenMPOp):
elif self.mode != 'max':
func = numpy.average
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for r in xrange(pr):
row_st = r * st0
row_end = builtins.min(row_st + ws0, img_rows)
if not inc_pad:
row_st = builtins.max(row_st, pad_h)
row_end = builtins.min(row_end, x.shape[-2] + pad_h)
for c in xrange(pc):
col_st = c * st1
col_end = builtins.min(col_st + ws1, img_cols)
if not inc_pad:
col_st = builtins.max(col_st, pad_w)
col_end = builtins.min(col_end,
x.shape[-1] + pad_w)
zz[n, k, r, c] = func(y[
n, k, row_st:row_end, col_st:col_end])
# precompute the region boundaries for each dimension
region_slices = [[] for i in xrange(nd)]
for i in xrange(nd):
for j in xrange(pool_out_shp[i]):
start = j * stride[i]
end = builtins.min(start + ws[i], img_shp[i])
if not inc_pad:
start = builtins.max(start, pad[i])
end = builtins.min(end, img_shp[i] - pad[i])
region_slices[i].append(slice(start, end))
# iterate over non-pooling dimensions
for k in numpy.ndindex(*x.shape[:-nd]):
zzk = zz[k]
yk = y[k]
# iterate over pooling regions
for r in numpy.ndindex(*pool_out_shp):
zzk[r] = func(
yk[[region_slices[i][r[i]] for i in xrange(nd)]])
def infer_shape(self, node, in_shapes):
ws, stride, pad = [node.inputs[1], node.inputs[2], node.inputs[3]]
shp = self.out_shape(in_shapes[0], ws, self.ignore_border, stride,
pad)
pad, self.ndim)
return [shp]
def grad(self, inp, grads):
......@@ -372,10 +400,12 @@ class Pool(OpenMPOp):
disc = [DisconnectedType()() for i in inp[1:]]
if self.mode == 'max':
maxout = self(x, ws, stride, pad)
return [MaxPoolGrad(ignore_border=self.ignore_border)(
return [MaxPoolGrad(ndim=self.ndim,
ignore_border=self.ignore_border)(
x, maxout, gz, ws=ws, stride=stride, pad=pad)] + disc
else:
return [AveragePoolGrad(ignore_border=self.ignore_border,
return [AveragePoolGrad(ndim=self.ndim,
ignore_border=self.ignore_border,
mode=self.mode)(
x, gz, ws=ws, stride=stride, pad=pad)] + disc
......@@ -392,292 +422,386 @@ class Pool(OpenMPOp):
raise theano.gof.utils.MethodNotDefined()
x, ws, stride, pad = inp
z, = out
nd = self.ndim
if nd is None:
nd = node.inputs[0].ndim - 2
total_ndim = node.inputs[0].ndim
non_pool_ndim = total_ndim - nd
fail = sub['fail']
ignore_border = int(self.ignore_border)
if self.openmp:
omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, collector) schedule(static)'
# run in parallel over each pooling block
omp_parallel = '#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, collector) schedule(static)'
else:
omp_parallel = ''
ccode = """
int ws0, ws1, st0, st1, pd0, pd1;
int typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int z_r, z_c; // shape of the output
int r, c; // shape of the padded_input
if(PyArray_DIM(%(ws)s, 0)!=2)
if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size 2");
PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
%(fail)s;
}
if(PyArray_DIM(%(stride)s, 0)!=2)
if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size 2");
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
%(fail)s;
}
if(PyArray_DIM(%(pad)s, 0)!=2)
if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size 2");
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
%(fail)s;
}
// Getting ws, stride and pad
ws0 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 1));
if(PyArray_NDIM(%(x)s)!=4)
if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
%(fail)s;
}
int z[%(nd)s]; // shape of the output
int r[%(nd)s]; // shape of the padded_input
int ws[%(nd)s];
int st[%(nd)s];
int pd[%(nd)s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<%(nd)s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(%(stride)s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
if (!%(ignore_border)s && nonzero_padding)
{
PyErr_SetString(PyExc_ValueError,
"padding must be zero when ignore border is False");
%(fail)s;
}
r = PyArray_DIMS(%(x)s)[2];
c = PyArray_DIMS(%(x)s)[3];
r += pd0 * 2;
c += pd1 * 2;
if (pd0 != 0 && pd1 != 0 && !%(ignore_border)s)
{
PyErr_SetString(PyExc_ValueError,
"padding must be (0,0) when ignore border is False");
%(fail)s;
}
if (%(ignore_border)s)
{
// '/' in C is different from '/' in python
if (r - ws0 < 0)
for (int i=0; i<%(nd)s; i++)
{
z_r = 0;
}
else
{
z_r = (r - ws0) / st0 + 1;
}
if (c - ws1 < 0)
{
z_c = 0;
}
else
{
z_c = (c - ws1) / st1 + 1;
// '/' in C is different from '/' in python
if (r[i] - ws[i] < 0)
{
z[i] = 0;
}
else
{
z[i] = (r[i] - ws[i]) / st[i] + 1;
}
}
}
else
{
// decide how many rows the output has
if (st0 >= ws0)
for (int i=0; i<%(nd)s; i++)
{
z_r = (r - 1) / st0 + 1;
}
else
{
z_r = std::max(0, (r - 1 - ws0 + st0) / st0) + 1;
// decide how many rows/cols the output has
if (st[i] >= ws[i])
{
z[i] = (r[i] - 1) / st[i] + 1;
}
else
{
z[i] = std::max(0, (r[i] - 1 - ws[i] + st[i]) / st[i]) + 1;
}
assert(z[i] > 0);
}
// decide how many columns the output has
if (st1 >= ws1)
}
// memory allocation of z if necessary
int mem_nec;
mem_nec = 0;
if ((!%(z)s) || *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<%(non_pool_ndim)s; i++)
{
z_c = (c - 1) / st1 + 1;
if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(x)s)[i])
{
mem_nec = 1;
break;
}
}
else
}
if (!mem_nec)
{
for (int i=0; i<%(nd)s; i++)
{
z_c = std::max(0, (c - 1 - ws1 + st0) / st1) + 1;
if (PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i] != z[i])
{
mem_nec = 1;
break;
}
}
assert(z_r > 0);
assert(z_c > 0);
}
// memory allocation of z if necessary
if ((!%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(x)s)[0])
||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(x)s)[1])
||(PyArray_DIMS(%(z)s)[2] != z_r)
||(PyArray_DIMS(%(z)s)[3] != z_c)
)
if (mem_nec)
{
if (%(z)s) Py_XDECREF(%(z)s);
npy_intp dims[4] = {0,0,0,0};
dims[0]=PyArray_DIMS(%(x)s)[0];
dims[1]=PyArray_DIMS(%(x)s)[1];
dims[2]=z_r;
dims[3]=z_c;
npy_intp dims[%(total_ndim)s];
for (int i=0; i<%(non_pool_ndim)s; i++)
{
dims[i] = PyArray_DIMS(%(x)s)[i];
}
for (int i=0; i<%(nd)s; i++)
{
dims[%(non_pool_ndim)s + i] = z[i];
}
//TODO: zeros not necessary
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
%(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, dims, typenum,0);
}
// initialize temp var for the value in a region
dtype_%(x)s collector;
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<%(nd)s; i++)
{
z_prod *= z[i];
}
// used for indexing a pool region inside the input
dtype_%(x)s collector; // temp var for the value in a region
if (z_r && z_c)
if (z_prod)
{
int r_st, r_end, c_st, c_end;
// will be used to hold start and end index of a region
int r_st[%(nd)s];
int r_end[%(nd)s];
// index for iterating over the pooling regions
int r_idx[%(nd)s];
// placeholder for PyArray indexing (output)
long int o_idx[%(total_ndim)s];
// placeholder for PyArray indexing (input)
long int i_idx[%(total_ndim)s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<%(non_pool_ndim)s; i++)
{
non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
}
%(omp_parallel)s
for(int t = 0; t < PyArray_DIMS(%(x)s)[0] * PyArray_DIMS(%(x)s)[1]; t++){
int b = t %% PyArray_DIMS(%(x)s)[0];
int k = t / PyArray_DIMS(%(x)s)[0];
for(int i=0; i < z_r; i++){
r_st = i * st0;
r_end = r_st + ws0;
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (%(non_pool_ndim)s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<%(non_pool_ndim)s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for i in xrange(nd):
ccode += """
for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
// skip the padding
r_st = r_st < pd0 ? pd0 : r_st;
r_end = r_end > (r - pd0) ? r - pd0 : r_end;
r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
// from padded_img space to img space
r_st -= pd0;
r_end -= pd0;
r_st[%(i)s] -= pd[%(i)s];
r_end[%(i)s] -= pd[%(i)s];
// handle the case where no padding, ignore border is True
if (%(ignore_border)s)
{
r_end = r_end > r ? r : r_end;
r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
}
for(int j=0; j<z_c; j++){
c_st = j * st1;
c_end = c_st + ws1;
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st;
c_end = c_end > (c - pd1) ? c - pd1 : c_end;
dtype_%(z)s * z = (
(dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, b, k, i, j)));
// change coordinates from padding_img space into img space
c_st -= pd1;
c_end -= pd1;
// handle the case where no padding, ignore border is True
if (%(ignore_border)s)
{
c_end = c_end > c ? c : c_end;
}
// use the index to find the correct position in the output
o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
""" % dict(i=i, ignore_border=ignore_border, non_pool_ndim=non_pool_ndim)
ccode += """
// get a pointer to the correct position in the output
dtype_%(z)s * z;
if (%(total_ndim)s == 4)
z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, o_idx[0], o_idx[1], o_idx[2], o_idx[3])));
else
z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s, o_idx)));
"""
if self.mode == 'max':
for i in xrange(nd):
ccode += """
// set the first index of dimension %(i)s
i_idx[%(non_pool_ndim)s + %(i)s] = r_st[%(i)s];
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
// use the first element as the initial value of collector
collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,r_st,c_st)))[0];
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
{
for(int n=c_st; n<c_end; n++)
{
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
collector = (a > collector) ? a : collector;
}
}
z[0] = collector;
// use the first element as the initial value of collector
if (%(total_ndim)s == 4)
collector = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
collector = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
"""
for i in xrange(nd):
ccode += """
// go through the pooled region in the unpadded input
for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
{
i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
// update maximum
dtype_%(x)s a;
if (%(total_ndim)s == 4)
a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
collector = (a > collector) ? a : collector;
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
ccode += """
z[0] = collector;
"""
elif self.mode in ('sum', 'average_exc_pad', 'average_inc_pad'):
ccode += """
// initialize the sum at zero
collector = ((dtype_%(x)s)(0));
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
{
for(int n=c_st; n<c_end; n++)
{
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
collector += a;
}
}
// initialize the sum at zero
collector = ((dtype_%(x)s)(0));
"""
for i in xrange(nd):
ccode += """
// go through the pooled region in the unpadded input
for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
{
i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
// update sum
dtype_%(x)s a;
if (%(total_ndim)s == 4)
a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
collector += a;
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
if self.mode == "sum":
ccode += """
z[0] = collector;
z[0] = collector;
"""
elif self.mode == 'average_inc_pad' and self.ignore_border:
# region size = product over all pooling dimensions
region_size = ' * '.join('ws[%d]' % i for i in xrange(nd))
ccode += """
z[0] = collector / (ws0 * ws1);
"""
z[0] = collector / (%(region_size)s);
""" % dict(region_size=region_size)
else:
# region size = number elements of in this region
region_size = ' * '.join('(r_end[%d]-r_st[%d])' % (i, i) for i in xrange(nd))
ccode += """
z[0] = collector / ((r_end-r_st)*(c_end-c_st));
"""
z[0] = collector / (%(region_size)s);
""" % dict(region_size=region_size)
for i in xrange(nd):
ccode += """
} // loop over pooling dimension
"""
ccode += """
}
}
}
}
} // for loop over non-pooling dimensions
} // if z_prod
"""
return ccode % locals()
def c_code_cache_version(self):
return (0, 6, 8, 6, self.openmp)
return (0, 6, 8, 7, self.openmp)
class PoolGrad(OpenMPOp):
__props__ = ('ignore_border', 'mode')
__props__ = ('ndim', 'ignore_border', 'mode')
@staticmethod
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=(0, 0)):
def out_shape(imgshape, ds, ignore_border=False, st=None, padding=None, ndim=None):
"""Return the shape of the output from this op, for input of given
shape and flags.
Parameters
----------
imgshape : tuple of integers or scalar Theano variables
the shape of a tensor of images. The last two elements are
interpreted as the number of rows, and the number of cols.
ds : tuple of two ints
the shape of a tensor of images. The last N elements are
interpreted as the downsampling dimensions.
ds : tuple of N ints
downsample factor over rows and columns this parameter
indicates the size of the pooling region
st : tuple of two ints
the stride size. This is the distance between the pooling
regions. If it's set to None, in which case it equlas ds.
ignore_border : bool
if ds doesn't divide imgshape, do we include an extra
row/col of partial downsampling (False) or ignore it
(True).
padding : tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders of
the images, pad_h is the size of the top and bottom
margins, and pad_w is the size of the left and right
margins.
If ds doesn't divide imgshape, do we include an extra row/col/slice
of partial downsampling (False) or ignore it (True).
st : list or tuple of N ints or None
Stride size, which is the number of shifts over rows/cols/slices to get the
next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding : tuple of N ints or None
For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
Returns
-------
list :
the shape of the output from this op, for input of given
shape. This will have the same length as imgshape, but
with last two elements reduced as per the downsampling &
with last N elements reduced as per the downsampling &
ignore_border flags.
"""
if len(imgshape) < 2:
raise TypeError('imgshape must have at least two elements '
'(rows, cols)')
if ndim is None:
ndim = len(imgshape) - 2
if len(imgshape) < ndim:
raise TypeError('imgshape must have at least {} dimensions'.format(ndim))
if st is None:
st = ds
r, c = imgshape[-2:]
r += padding[0] * 2
c += padding[1] * 2
if ignore_border:
out_r = (r - ds[0]) // st[0] + 1
out_c = (c - ds[1]) // st[1] + 1
if isinstance(r, theano.Variable):
nr = tensor.maximum(out_r, 0)
else:
nr = numpy.maximum(out_r, 0)
if isinstance(c, theano.Variable):
nc = tensor.maximum(out_c, 0)
else:
nc = numpy.maximum(out_c, 0)
else:
if isinstance(r, theano.Variable):
nr = tensor.switch(tensor.ge(st[0], ds[0]),
(r - 1) // st[0] + 1,
tensor.maximum(0, (r - 1 - ds[0]) //
st[0] + 1) + 1)
elif st[0] >= ds[0]:
nr = (r - 1) // st[0] + 1
else:
nr = max(0, (r - 1 - ds[0]) // st[0] + 1) + 1
if isinstance(c, theano.Variable):
nc = tensor.switch(tensor.ge(st[1], ds[1]),
(c - 1) // st[1] + 1,
tensor.maximum(0, (c - 1 - ds[1]) //
st[1] + 1) + 1)
elif st[1] >= ds[1]:
nc = (c - 1) // st[1] + 1
if padding is None:
padding = (0,) * ndim
patch_shape = tuple(tensor.extract_constant(imgshape[-ndim + i]) + padding[i] * 2
for i in xrange(ndim))
def compute_out(v, downsample, stride):
if ignore_border:
out = (v - downsample) // stride + 1
if isinstance(out, theano.Variable):
return tensor.maximum(out, 0)
else:
return numpy.maximum(out, 0)
else:
nc = max(0, (c - 1 - ds[1]) // st[1] + 1) + 1
if isinstance(v, theano.Variable):
return tensor.switch(tensor.ge(stride, downsample),
(v - 1) // stride + 1,
tensor.maximum(0, (v - 1 - downsample) //
stride + 1) + 1)
elif stride >= downsample:
return (v - 1) // stride + 1
else:
return max(0, (v - 1 - downsample) // stride + 1) + 1
out_shape = [compute_out(patch_shape[i], ds[i], st[i]) for i in xrange(ndim)]
rval = list(imgshape[:-2]) + [nr, nc]
rval = list(imgshape[:-ndim]) + out_shape
return rval
def __init__(self, ignore_border, mode='max', openmp=None):
def __init__(self, ignore_border, mode='max', ndim=None, openmp=None):
self.ndim = ndim
self.ignore_border = ignore_border
if mode not in ['max', 'sum', 'average_inc_pad', 'average_exc_pad']:
raise ValueError(
......@@ -689,6 +813,7 @@ class PoolGrad(OpenMPOp):
def prepare_node(self, node, storage_map, compute_map):
if len(node.inputs) < 5: # 5 for AveragePoolGrad, 6 for MaxPoolGrad
# Old interface
self.ndim = len(node.op.ds)
self.mode = node.op.mode
ws = theano.tensor.constant(node.op.ds)
st = theano.tensor.constant(node.op.st)
......@@ -720,26 +845,32 @@ class PoolGrad(OpenMPOp):
class MaxPoolGrad(PoolGrad):
def __init__(self, ignore_border, openmp=None):
PoolGrad.__init__(self, ignore_border, mode='max', openmp=openmp)
def __init__(self, ignore_border, ndim=None, openmp=None):
PoolGrad.__init__(self, ignore_border, mode='max', ndim=ndim, openmp=openmp)
def make_node(self, x, maxout, gz, ws, stride=None, pad=(0, 0)):
def make_node(self, x, maxout, gz, ws, stride=None, pad=None):
# make_node should only be called by the grad function of
# Pool, so these asserts should not fail.
x = tensor.as_tensor_variable(x)
maxout = tensor.as_tensor_variable(maxout)
gz = tensor.as_tensor_variable(gz)
nd = self.ndim
if nd is None:
nd = x.ndim - 2
if stride is None:
stride = ws
if pad is None:
pad = (0,) * nd
ws = tensor.as_tensor_variable(ws)
stride = tensor.as_tensor_variable(stride)
pad = tensor.as_tensor_variable(pad)
assert isinstance(x, Variable) and x.ndim == 4
assert isinstance(maxout, Variable) and maxout.ndim == 4
assert isinstance(gz, Variable) and gz.ndim == 4
assert isinstance(x, Variable) and x.ndim >= nd
assert isinstance(maxout, Variable) and maxout.ndim >= nd
assert isinstance(gz, Variable) and gz.ndim >= nd
assert isinstance(ws, Variable) and ws.ndim == 1
assert isinstance(stride, Variable) and stride.ndim == 1
assert isinstance(pad, Variable) and pad.ndim == 1
assert x.ndim == maxout.ndim == gz.ndim >= nd
if not ws.dtype.startswith('int'):
raise TypeError('Pool downsample parameters must be ints.')
if not stride.dtype.startswith('int'):
......@@ -752,41 +883,51 @@ class MaxPoolGrad(PoolGrad):
assert self.mode == 'max'
x, maxout, gz, ws, stride, pad = inp
gx_stg, = out
assert ws.shape == stride.shape == pad.shape == (2,)
# number of pooling output rows
pr = maxout.shape[-2]
# number of pooling output cols
pc = maxout.shape[-1]
ws0, ws1 = ws
st0, st1 = stride
pad_h = pad[0]
pad_w = pad[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
nd = self.ndim
if nd is None:
nd = len(x.shape) - 2
assert ws.shape == stride.shape == pad.shape == (nd,)
if len(x.shape) < nd:
raise NotImplementedError(
'MaxPoolGrad requires input with {} or more dimensions'.format(nd))
pool_out_shp = maxout.shape[-nd:]
img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in xrange(nd))
# pad the image
if (pad_h, pad_w) != (0, 0):
y = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype)
y[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)] = x
if max(pad) != 0:
y = numpy.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
y[(slice(None),) * (len(x.shape) - nd) +
tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))] = x
else:
y = x
gx = numpy.zeros_like(y)
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for r in xrange(pr):
row_st = builtins.max(r * st0, pad_h)
row_end = builtins.min(row_st + ws0, img_rows)
for c in xrange(pc):
col_st = builtins.max(c * st1, pad_w)
col_end = builtins.min(col_st + ws1, img_cols)
for row_ind in xrange(row_st, row_end):
for col_ind in xrange(col_st, col_end):
if (maxout[n, k, r, c] == y[n, k, row_ind, col_ind]):
gx[n, k, row_ind, col_ind] += gz[n, k, r, c]
# precompute the region boundaries for each dimension
region_ranges = [[] for i in xrange(nd)]
for i in xrange(nd):
for j in xrange(pool_out_shp[i]):
start = builtins.max(j * stride[i], pad[i])
end = builtins.min(start + ws[i], img_shp[i])
region_ranges[i].append(xrange(start, end))
# iterate over non-pooling dimensions
for k in numpy.ndindex(*x.shape[:-nd]):
gxk = gx[k]
gzk = gz[k]
yk = y[k]
maxoutk = maxout[k]
# iterate over pooling regions
for r in numpy.ndindex(*pool_out_shp):
maxout_value = maxoutk[r]
# iterate inside region
for c in itertools.product(*[region_ranges[i][r[i]]
for i in xrange(nd)]):
if maxout_value == yk[c]:
gxk[c] += gzk[r]
# unpad the image
gx = gx[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)]
gx = gx[(slice(None),) * (len(x.shape) - nd) +
tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))]
gx_stg[0] = gx
def grad(self, inp, grads):
......@@ -794,7 +935,8 @@ class MaxPoolGrad(PoolGrad):
ggx, = grads
return ([theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout),
DownsampleFactorMaxGradGrad(ignore_border=self.ignore_border)(
DownsampleFactorMaxGradGrad(ndim=self.ndim,
ignore_border=self.ignore_border)(
x, maxout, ggx, ws, stride, pad)] +
[DisconnectedType()() for i in inp[3:]])
......@@ -805,161 +947,253 @@ class MaxPoolGrad(PoolGrad):
assert self.mode == 'max'
x, z, gz, ws, stride, pad = inp
gx, = out
nd = self.ndim
if nd is None:
nd = node.inputs[0].ndim - 2
total_ndim = node.inputs[0].ndim
non_pool_ndim = total_ndim - nd
fail = sub['fail']
ignore_border = int(self.ignore_border)
if self.openmp:
omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)'
# run in parallel over each pooling block
omp_parallel = '#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)'
else:
omp_parallel = ''
return """
ccode = """
// sanity checks
int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int z_typenum = PyArray_ObjectType((PyObject*)%(z)s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0);
int ws0, ws1, st0, st1, pd0, pd1;
int z_r, z_c;
int r, c; // shape of the padded_input
if ((x_typenum != z_typenum) || (x_typenum != gz_typenum))
{
PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)s;
}
if(PyArray_NDIM(%(x)s)!=4)
if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
{
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
%(fail)s;
}
if(PyArray_NDIM(%(z)s)!=4)
if(PyArray_NDIM(%(z)s)!=%(total_ndim)s)
{
PyErr_SetString(PyExc_ValueError, "z must be a 4d ndarray");
PyErr_SetString(PyExc_ValueError, "z must be a %(total_ndim)sD ndarray");
%(fail)s;
}
if(PyArray_NDIM(%(gz)s)!=4)
if(PyArray_NDIM(%(gz)s)!=%(total_ndim)s)
{
PyErr_SetString(PyExc_ValueError, "gz must be a 4d ndarray");
PyErr_SetString(PyExc_ValueError, "gz must be a %(total_ndim)sD ndarray");
%(fail)s;
}
if(PyArray_DIM(%(ws)s, 0)!=2)
if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size 2");
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
%(fail)s;
}
if(PyArray_DIM(%(stride)s, 0)!=2)
if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size 2");
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
%(fail)s;
}
if(PyArray_DIM(%(pad)s, 0)!=2)
if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size 2");
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
%(fail)s;
}
// Getting ws, stride and pad
ws0 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 1));
z_r = PyArray_DIMS(%(z)s)[2];
z_c = PyArray_DIMS(%(z)s)[3];
r = PyArray_DIMS(%(x)s)[2];
c = PyArray_DIMS(%(x)s)[3];
r += pd0 * 2;
c += pd1 * 2;
// allocating memory for gx
if ((!%(gx)s)
|| !PyArray_ISCONTIGUOUS(%(gx)s)
|| *PyArray_DIMS(%(gx)s)!=4
||(PyArray_DIMS(%(gx)s)[0] != PyArray_DIMS(%(x)s)[0])
||(PyArray_DIMS(%(gx)s)[1] != PyArray_DIMS(%(x)s)[1])
||(PyArray_DIMS(%(gx)s)[2] != PyArray_DIMS(%(x)s)[2])
||(PyArray_DIMS(%(gx)s)[3] != PyArray_DIMS(%(x)s)[3])
)
int z[%(nd)s]; // shape of the output
int r[%(nd)s]; // shape of the padded_input
int ws[%(nd)s];
int st[%(nd)s];
int pd[%(nd)s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<%(nd)s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(%(stride)s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
z[i] = PyArray_DIMS(%(z)s)[%(non_pool_ndim)s + i];
r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
// allocating memory for output, if necessary
int mem_nec;
mem_nec = 0;
if ((!%(gx)s) || !PyArray_ISCONTIGUOUS(%(gx)s)
|| *PyArray_DIMS(%(gx)s)!=%(total_ndim)s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<%(total_ndim)s; i++)
{
if (PyArray_DIMS(%(gx)s)[i] != PyArray_DIMS(%(x)s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
Py_XDECREF(%(gx)s);
%(gx)s = (PyArrayObject*) PyArray_ZEROS(4, PyArray_DIMS(%(x)s), x_typenum,0);
%(gx)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(x)s), x_typenum,0);
}
else {
PyArray_FILLWBYTE(%(gx)s, 0);
}
dtype_%(z)s maximum; // temp var for maximum value in a region
if (z_r && z_c)
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<%(nd)s; i++)
{
int r_st, r_end, c_st, c_end;
z_prod *= z[i];
}
if (z_prod)
{
// will be used to hold start and end index of a region
int r_st[%(nd)s];
int r_end[%(nd)s];
// index for iterating over the pooling regions
int r_idx[%(nd)s];
// placeholder for PyArray indexing (output)
long int o_idx[%(total_ndim)s];
// placeholder for PyArray indexing (input)
long int i_idx[%(total_ndim)s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<%(non_pool_ndim)s; i++)
{
non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
}
%(omp_parallel)s
for(int t = 0; t < PyArray_DIMS(%(x)s)[0] * PyArray_DIMS(%(x)s)[1]; t++){
int b = t %% PyArray_DIMS(%(x)s)[0];
int k = t / PyArray_DIMS(%(x)s)[0];
for(int i=0; i < z_r; i++){
r_st = i * st0;
r_end = r_st + ws0;
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (%(non_pool_ndim)s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<%(non_pool_ndim)s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
o_idx[i - 1] =o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for i in xrange(nd):
ccode += """
for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
// skip the padding
r_st = r_st < pd0 ? pd0 : r_st;
r_end = r_end > (r - pd0) ? r - pd0 : r_end;
r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
// from padded_img space to img space
r_st -= pd0;
r_end -= pd0;
for(int j=0; j<z_c; j++){
c_st = j * st1;
c_end = c_st + ws1;
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st;
c_end = c_end > (c - pd1) ? c - pd1 : c_end;
// change coordinates from padding_img space into img space
c_st -= pd1;
c_end -= pd1;
r_st[%(i)s] -= pd[%(i)s];
r_end[%(i)s] -= pd[%(i)s];
// use the index to find the correct position in the output
o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(gz)s * gz;
if (%(total_ndim)s == 4)
{
// the maximum value
maximum = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
// the gradient corresponding to this maximum value in z
gz = ((dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the maximum value
maximum = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,b,k,i,j)))[0];
maximum = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s,o_idx)))[0];
// the gradient corresponding to this maximum value in z
dtype_%(gz)s * gz = (
(dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, b, k, i, j)));
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
gz = ((dtype_%(gz)s*)(PyArray_GetPtr(%(gz)s, o_idx)));
}
"""
for i in xrange(nd):
ccode += """
// go through the pooled region in the unpadded input
for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
{
i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(x)s a;
dtype_%(gx)s * gx;
if (%(total_ndim)s == 4)
{
for(int n=c_st; n<c_end; n++)
{
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
dtype_%(gx)s * gx = (
(dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, b, k, m, n)));
if (a == maximum){
gx[0] = gx[0] + gz[0];
}
}
a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
gx = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
}
}
}
}
}
""" % locals()
else
{
a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
gx = ((dtype_%(gx)s*)(PyArray_GetPtr(%(gx)s, i_idx)));
}
if (a == maximum){
gx[0] = gx[0] + gz[0];
}
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
for i in xrange(nd):
ccode += """
} // loop over pooling dimension
"""
ccode += """
} // for loop over non-pooling dimensions
} // if z_prod
"""
return ccode % locals()
def c_code_cache_version(self):
return (0, 9, self.openmp)
return (0, 10, self.openmp)
class AveragePoolGrad(PoolGrad):
def __init__(self, ignore_border, mode='average_inc_pad'):
def __init__(self, ignore_border, mode='average_inc_pad', ndim=None):
assert mode in ['sum', 'average_inc_pad', 'average_exc_pad']
PoolGrad.__init__(self, ignore_border, mode)
PoolGrad.__init__(self, ignore_border, mode, ndim)
# There is an extra dummy parameter to match the parameter count
# of MaxPoolGrad. They have to keep the same interface because of
# the DownsampleFactorMaxGrad trick to keep old scripts working
# (see downsample.py for details on this).
def make_node(self, x, gz, ws, stride=None, pad=(0, 0), dummy=None):
def make_node(self, x, gz, ws, stride=None, pad=None, dummy=None):
# make_node should only be called by the grad function of
# Pool, so these asserts should not fail.
x = tensor.as_tensor_variable(x)
gz = tensor.as_tensor_variable(gz)
nd = self.ndim
if nd is None:
nd = x.ndim - 2
if stride is None:
stride = ws
if pad is None:
pad = (0,) * nd
ws = tensor.as_tensor_variable(ws)
stride = tensor.as_tensor_variable(stride)
pad = tensor.as_tensor_variable(pad)
assert isinstance(x, Variable) and x.ndim == 4
assert isinstance(gz, Variable) and gz.ndim == 4
assert isinstance(x, Variable) and x.ndim >= nd
assert isinstance(gz, Variable) and gz.ndim >= nd
assert isinstance(ws, Variable) and ws.ndim == 1
assert isinstance(stride, Variable) and stride.ndim == 1
assert x.ndim == gz.ndim >= nd
assert isinstance(pad, Variable) and pad.ndim == 1
if not ws.dtype.startswith('int'):
raise TypeError('Pool downsample parameters must be ints.')
......@@ -972,96 +1206,333 @@ class AveragePoolGrad(PoolGrad):
def perform(self, node, inp, out):
x, gz, ws, stride, pad = inp
gx_stg, = out
assert ws.shape == stride.shape == pad.shape == (2,)
if self.mode == 'average_exc_pad' and pad[0] != 0 and pad[1] != 0:
nd = self.ndim
if nd is None:
nd = len(x.shape) - 2
assert ws.shape == stride.shape == pad.shape == (nd,)
if len(x.shape) < nd:
raise NotImplementedError(
'AveragePoolGrad requires input with {} or more dimensions'.format(nd))
if self.mode == 'average_exc_pad' and max(pad) != 0:
raise NotImplementedError()
z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad)
z_shape = self.out_shape(x.shape, ws, self.ignore_border, stride, pad, nd)
if (gx_stg[0] is None) or (gx_stg[0].shape != z_shape):
gx_stg[0] = numpy.empty(z_shape, dtype=x.dtype)
zz = gx_stg[0]
# number of pooling output rows
pr = zz.shape[-2]
# number of pooling output cols
pc = zz.shape[-1]
ws0, ws1 = ws
st0, st1 = stride
pad_h = pad[0]
pad_w = pad[1]
img_rows = x.shape[-2] + 2 * pad_h
img_cols = x.shape[-1] + 2 * pad_w
# size of pooling output
pool_out_shp = zz.shape[-nd:]
img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in xrange(nd))
inc_pad = self.mode == 'average_inc_pad'
sum_mode = self.mode == 'sum'
# pad the image
if (pad_h, pad_w) != (0, 0):
y = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype)
y[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)] = x
else:
y = x
gx = numpy.zeros_like(y)
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for r in xrange(pr):
if sum_mode or inc_pad:
row_st = r * st0
else:
row_st = builtins.max(r * st0, pad_h)
row_end = builtins.min(row_st + ws0, img_rows)
for c in xrange(pc):
if sum_mode or inc_pad:
col_st = c * st1
else:
col_st = builtins.max(c * st1, pad_w)
col_end = builtins.min(col_st + ws1, img_cols)
if sum_mode:
val = gz[n, k, r, c]
else:
val = gz[n, k, r, c] / ((row_end - row_st) *
(col_end - col_st))
gx[n, k, row_st:row_end, col_st:col_end] += val
# initialize the padded output
gx = numpy.zeros((x.shape[:-nd] + img_shp), dtype=x.dtype)
# precompute the region boundaries and sizes for each dimension
region_slices = [[] for i in xrange(nd)]
region_sizes = [[] for i in xrange(nd)]
for i in xrange(nd):
for j in xrange(pool_out_shp[i]):
if sum_mode or inc_pad:
start = j * stride[i]
else:
start = builtins.max(j * stride[i], pad[i])
end = builtins.min(start + ws[i], img_shp[i])
region_slices[i].append(slice(start, end))
region_sizes[i].append(end - start)
# iterate over non-pooling dimensions
region_slice = [None] * nd
for k in numpy.ndindex(*x.shape[:-nd]):
gzk = gz[k]
gxk = gx[k]
# iterate over pooling regions
for r in numpy.ndindex(*pool_out_shp):
region_size = 1
for i in xrange(nd):
region_slice[i] = region_slices[i][r[i]]
region_size *= region_sizes[i][r[i]]
if sum_mode:
val = gzk[r]
else:
# divide by region size
val = gzk[r] / region_size
gxk[region_slice] += val
# unpad the image
gx = gx[:, :, pad_h:(img_rows - pad_h), pad_w:(img_cols - pad_w)]
gx = gx[(slice(None),) * (len(x.shape) - nd) +
tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))]
gx_stg[0] = gx
def grad(self, inp, grads):
x, gz, ws, stride, pad = inp
ggx, = grads
return ([theano.tensor.zeros_like(x),
Pool(ignore_border=self.ignore_border, mode=self.mode)(ggx,
Pool(ignore_border=self.ignore_border,
ndim=self.ndim, mode=self.mode)(ggx,
ws, stride, pad)] + [DisconnectedType()() for i in inp[2:]])
def connection_pattern(self, node):
return [[1], [1], [0], [0], [0]]
def c_code(self, node, name, inp, out, sub):
x, gz, ws, stride, pad = inp
gx, = out
nd = self.ndim
if nd is None:
nd = node.inputs[0].ndim - 2
total_ndim = node.inputs[0].ndim
non_pool_ndim = total_ndim - nd
fail = sub['fail']
inc_pad = int(self.mode == 'average_inc_pad')
sum_mode = int(self.mode == 'sum')
if self.openmp:
# run in parallel over each pooling block
omp_parallel = '#pragma omp parallel for private(r_st, r_end, r_pad_width, r_idx, i_idx, o_idx) schedule(static)'
else:
omp_parallel = ''
ccode = """
// sanity checks
int x_typenum = PyArray_ObjectType((PyObject*)%(x)s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)%(gz)s, 0);
if (x_typenum != gz_typenum)
{
PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)s;
}
if(PyArray_NDIM(%(x)s)!=%(total_ndim)s)
{
PyErr_SetString(PyExc_ValueError, "x must be a %(total_ndim)sD ndarray");
%(fail)s;
}
if(PyArray_NDIM(%(gz)s)!=%(total_ndim)s)
{
PyErr_SetString(PyExc_ValueError, "gz must be a %(total_ndim)sD ndarray");
%(fail)s;
}
if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
%(fail)s;
}
if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
%(fail)s;
}
if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
%(fail)s;
}
int z[%(nd)s]; // shape of the output
int r[%(nd)s]; // shape of the padded_input
int ws[%(nd)s];
int st[%(nd)s];
int pd[%(nd)s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<%(nd)s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(%(stride)s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
z[i] = PyArray_DIMS(%(gz)s)[%(non_pool_ndim)s + i];
r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
if (!%(inc_pad)s && !%(sum_mode)s && nonzero_padding)
{
PyErr_SetString(PyExc_ValueError,
"padding must be zero for average_exc_pad");
%(fail)s;
}
// allocating memory for output, if necessary
int mem_nec;
mem_nec = 0;
if ((!%(gx)s) || !PyArray_ISCONTIGUOUS(%(gx)s)
|| *PyArray_DIMS(%(gx)s)!=%(total_ndim)s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<%(total_ndim)s; i++)
{
if (PyArray_DIMS(%(gx)s)[i] != PyArray_DIMS(%(x)s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
Py_XDECREF(%(gx)s);
%(gx)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(x)s), x_typenum,0);
}
else {
PyArray_FILLWBYTE(%(gx)s, 0);
}
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<%(nd)s; i++)
{
z_prod *= z[i];
}
if (z_prod)
{
// will be used to hold start and end index of a region
int r_st[%(nd)s];
int r_end[%(nd)s];
// padded region size
int r_pad_width[%(nd)s];
// index for iterating over the pooling regions
int r_idx[%(nd)s];
// placeholder for PyArray indexing (output)
long int o_idx[%(total_ndim)s];
// placeholder for PyArray indexing (input)
long int i_idx[%(total_ndim)s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<%(non_pool_ndim)s; i++)
{
non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
}
%(omp_parallel)s
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (%(non_pool_ndim)s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<%(non_pool_ndim)s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
o_idx[i - 1] =o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for i in xrange(nd):
ccode += """
for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
if (!%(sum_mode)s && !%(inc_pad)s && r_st[%(i)s] < pd[%(i)s])
{
r_st[%(i)s] = pd[%(i)s];
}
r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] ? r[%(i)s] : r_end[%(i)s];
r_pad_width[%(i)s] = r_end[%(i)s] - r_st[%(i)s];
// from padded_img space to img space
r_st[%(i)s] = r_st[%(i)s] - pd[%(i)s] > 0 ? r_st[%(i)s] - pd[%(i)s] : 0;
r_end[%(i)s] = r_end[%(i)s] > r[%(i)s] - pd[%(i)s] ? r[%(i)s] - 2 * pd[%(i)s] : r_end[%(i)s] - pd[%(i)s];
// use the index to find the correct position in the output
o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
""" % dict(i=i, sum_mode=sum_mode, inc_pad=inc_pad, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(gz)s * gz;
dtype_%(gz)s val;
if (%(total_ndim)s == 4)
{
// the gradient for this region
gz = ((dtype_%(gz)s*)(PyArray_GETPTR4(%(gz)s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the gradient for this region
gz = ((dtype_%(gz)s*)(PyArray_GetPtr(%(gz)s, o_idx)));
}
// compute the contribution
if (%(sum_mode)s)
{
val = gz[0];
}
else
{
val = gz[0] / (%(region_size)s);
}
"""
region_size = ' * '.join('r_pad_width[%d]' % i for i in xrange(nd))
for i in xrange(nd):
ccode += """
// go through the pooled region in the unpadded input
for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
{
i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(gx)s * gx;
if (%(total_ndim)s == 4)
{
gx = ((dtype_%(gx)s*)(PyArray_GETPTR4(%(gx)s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
}
else
{
gx = ((dtype_%(gx)s*)(PyArray_GetPtr(%(gx)s, i_idx)));
}
gx[0] = gx[0] + val;
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
for i in xrange(nd):
ccode += """
} // loop over pooling dimension
"""
ccode += """
} // for loop over non-pooling dimensions
} // if z_prod
"""
return ccode % locals()
def c_code_cache_version(self):
return (0, 3, self.openmp)
class DownsampleFactorMaxGradGrad(OpenMPOp):
__props__ = ('ignore_border', 'mode')
__props__ = ('ndim', 'ignore_border', 'mode')
def __init__(self, ignore_border, mode='max', openmp=None):
def __init__(self, ignore_border, mode='max', ndim=None, openmp=None):
self.ndim = ndim
self.ignore_border = ignore_border
self.mode = mode
super(DownsampleFactorMaxGradGrad, self).__init__(openmp=openmp)
assert self.mode == 'max'
def make_node(self, x, maxout, gz, ws, stride=None, pad=(0, 0)):
def make_node(self, x, maxout, gz, ws, stride=None, pad=None):
# make_node should only be called by the grad function of
# MaxPoolGrad, so these asserts should not fail.
x = tensor.as_tensor_variable(x)
maxout = tensor.as_tensor_variable(maxout)
gz = tensor.as_tensor_variable(gz)
assert x.ndim == 4
assert maxout.ndim == 4
assert gz.ndim == 4
nd = self.ndim
if nd is None:
nd = x.ndim - 2
if stride is None:
stride = ws
if isinstance(pad, (tuple, list)):
if tuple(pad) != (0, 0) and not self.ignore_border:
if pad is None:
pad = (0,) * nd
elif isinstance(pad, (tuple, list)):
if max(pad) != 0 and not self.ignore_border:
raise NotImplementedError(
'padding works only with ignore_border=True')
if isinstance(ws, (tuple, list)):
if pad[0] >= ws[0] or pad[1] >= ws[1]:
if any(pad[i] >= ws[i] for i in range(nd)):
raise NotImplementedError(
'padding_h and padding_w must be smaller than strides')
ws = tensor.as_tensor_variable(ws)
......@@ -1070,6 +1541,7 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
assert ws.ndim == 1
assert stride.ndim == 1
assert pad.ndim == 1
assert x.ndim == maxout.ndim == gz.ndim >= nd
if not ws.dtype.startswith('int'):
raise TypeError('Pool downsample parameters must be ints.')
if not stride.dtype.startswith('int'):
......@@ -1081,49 +1553,56 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
def perform(self, node, inp, out):
x, maxout, ggx, ws, stride, pad = inp
z, = out
assert ws.shape == stride.shape == pad.shape == (2,)
if len(x.shape) != 4:
nd = self.ndim
if nd is None:
nd = len(x.shape) - 2
assert ws.shape == stride.shape == pad.shape == (nd,)
if len(x.shape) < nd:
raise NotImplementedError(
'DownsampleFactorMaxGradGrad requires 4D input for now')
'DownsampleFactorMaxGradGrad requires input '
'with {} or more dimensions'.format(nd))
if (z[0] is None) or (z[0].shape != maxout.shape):
z[0] = numpy.zeros(maxout.shape, dtype=x.dtype)
ggz = z[0] # grad wrt maxout_grad has the same shape as maxout
# number of pooling output rows
pr = ggz.shape[-2]
# number of pooling output cols
pc = ggz.shape[-1]
ws0, ws1 = ws
st0, st1 = stride
pd0, pd1 = pad
img_rows = x.shape[-2] + 2 * pd0
img_cols = x.shape[-1] + 2 * pd1
# size of pooling output
pool_out_shp = ggz.shape[-nd:]
img_shp = tuple(x.shape[-nd + i] + 2 * pad[i] for i in xrange(nd))
# pad the image and its gradients
if pd0 != 0 and pd1 != 0:
y_padded = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype) + x.min() - 1
y_padded[:, :, pd0:(img_rows - pd0), pd1:(img_cols - pd1)] = x
ggx_padded = numpy.zeros(
(x.shape[0], x.shape[1], img_rows, img_cols),
dtype=x.dtype)
ggx_padded[:, :, pd0:(img_rows - pd0), pd1:(img_cols - pd1)] = ggx
if max(pad) > 0:
y_padded = numpy.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
y_padded[(slice(None),) * (len(x.shape) - nd) +
tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))] = x
ggx_padded = numpy.zeros(x.shape[:-nd] + img_shp, dtype=x.dtype)
ggx_padded[(slice(None),) * (len(x.shape) - nd) +
tuple(slice(pad[i], img_shp[i] - pad[i]) for i in xrange(nd))] = ggx
else:
y_padded = x
ggx_padded = ggx
for n in xrange(x.shape[0]):
for k in xrange(x.shape[1]):
for r in xrange(pr):
row_st = r * st0
row_end = builtins.min(row_st + ws0, img_rows)
for c in xrange(pc):
col_st = c * st1
col_end = builtins.min(col_st + ws1, img_cols)
for row_ind in xrange(row_st, row_end):
for col_ind in xrange(col_st, col_end):
if (maxout[n, k, r, c] == y_padded[n, k, row_ind, col_ind]):
ggz[n, k, r, c] = ggx_padded[n, k, row_ind, col_ind]
# precompute the region boundaries for each dimension
region_ranges = [[] for i in xrange(nd)]
for i in xrange(nd):
for j in xrange(pool_out_shp[i]):
start = j * stride[i]
end = builtins.min(start + ws[i], img_shp[i])
region_ranges[i].append(xrange(start, end))
# iterate over non-pooling dimensions
for k in numpy.ndindex(*x.shape[:-nd]):
ggxk = ggx_padded[k]
ggzk = ggz[k]
yk = y_padded[k]
maxoutk = maxout[k]
# iterate over pooling regions
for r in numpy.ndindex(*pool_out_shp):
# iterate inside region
maxout_value = maxoutk[r]
for c in itertools.product(*[region_ranges[i][r[i]]
for i in xrange(nd)]):
if maxout_value == yk[c]:
ggzk[r] += ggxk[c]
def infer_shape(self, node, in_shapes):
return [in_shapes[1]]
......@@ -1133,8 +1612,9 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
gz, = grads
return [theano.tensor.zeros_like(x),
theano.tensor.zeros_like(maxout),
MaxPoolGrad(ignore_border=self.ignore_border)(x, maxout, gz,
ws, stride, pad),
MaxPoolGrad(ignore_border=self.ignore_border,
ndim=self.ndim)(x, maxout, gz,
ws, stride, pad),
DisconnectedType()(),
DisconnectedType()(),
DisconnectedType()()]
......@@ -1147,106 +1627,182 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
raise theano.gof.utils.MethodNotDefined()
x, maxout, ggx, ws, stride, pad = inp
z, = out # the grad of grad
nd = self.ndim
if nd is None:
nd = node.inputs[0].ndim - 2
total_ndim = node.inputs[0].ndim
non_pool_ndim = total_ndim - nd
fail = sub['fail']
ignore_border = int(self.ignore_border)
if self.openmp:
omp_parallel = '#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)'
# run in parallel over each pooling block
omp_parallel = '#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)'
else:
omp_parallel = ''
return """
int ws0, ws1, st0, st1, pd0, pd1;
ccode = """
int z_typenum = PyArray_ObjectType((PyObject*)%(maxout)s, 0);
int z_r, z_c;
int r, c; // shape of the padded_input
if(PyArray_DIM(%(ws)s, 0)!=2)
int z[%(nd)s]; // shape of the output
int r[%(nd)s]; // shape of the padded_input
int ws[%(nd)s];
int st[%(nd)s];
int pd[%(nd)s];
if(PyArray_DIM(%(ws)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size 2");
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size %(nd)s");
%(fail)s;
}
if(PyArray_DIM(%(stride)s, 0)!=2)
if(PyArray_DIM(%(stride)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size 2");
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size %(nd)s");
%(fail)s;
}
if(PyArray_DIM(%(pad)s, 0)!=2)
if(PyArray_DIM(%(pad)s, 0)!=%(nd)s)
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size 2");
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size %(nd)s");
%(fail)s;
}
// Getting ws, stride and pad
ws0 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(%(ws)s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(%(stride)s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(%(pad)s, 1));
z_r = PyArray_DIMS(%(maxout)s)[2];
z_c = PyArray_DIMS(%(maxout)s)[3];
r = PyArray_DIMS(%(x)s)[2];
c = PyArray_DIMS(%(x)s)[3];
r += pd0 * 2;
c += pd1 * 2;
// allocating memory for output
if ((!%(z)s)
|| !PyArray_ISCONTIGUOUS(%(z)s)
|| *PyArray_DIMS(%(z)s)!=4
||(PyArray_DIMS(%(z)s)[0] != PyArray_DIMS(%(maxout)s)[0])
||(PyArray_DIMS(%(z)s)[1] != PyArray_DIMS(%(maxout)s)[1])
||(PyArray_DIMS(%(z)s)[2] != PyArray_DIMS(%(maxout)s)[2])
||(PyArray_DIMS(%(z)s)[3] != PyArray_DIMS(%(maxout)s)[3])
)
for (int i=0; i<%(nd)s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(%(ws)s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(%(stride)s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(%(pad)s, i));
z[i] = PyArray_DIMS(%(maxout)s)[%(non_pool_ndim)s + i];
r[i] = PyArray_DIMS(%(x)s)[%(non_pool_ndim)s + i] + 2 * pd[i];
}
// allocating memory for output, if necessary
int mem_nec;
mem_nec = 0;
if ((!%(z)s) || !PyArray_ISCONTIGUOUS(%(z)s)
|| *PyArray_DIMS(%(z)s)!=%(total_ndim)s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<%(total_ndim)s; i++)
{
if (PyArray_DIMS(%(z)s)[i] != PyArray_DIMS(%(maxout)s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
Py_XDECREF(%(z)s);
%(z)s = (PyArrayObject*) PyArray_ZEROS(4, PyArray_DIMS(%(maxout)s), z_typenum,0);
%(z)s = (PyArrayObject*) PyArray_ZEROS(%(total_ndim)s, PyArray_DIMS(%(maxout)s), z_typenum,0);
}
else {
PyArray_FILLWBYTE(%(z)s, 0);
}
dtype_%(maxout)s maximum; // temp var for maximum value in a region
int r_st, r_end, c_st, c_end;
// will be used to hold start and end index of a region
int r_st[%(nd)s];
int r_end[%(nd)s];
// index for iterating over the pooling regions
int r_idx[%(nd)s];
// placeholder for PyArray indexing (output)
long int o_idx[%(total_ndim)s];
// placeholder for PyArray indexing (input)
long int i_idx[%(total_ndim)s];
// loop over non-pooling dimensions
int non_pooling_prod;
non_pooling_prod = 1;
for (int i=0; i<%(non_pool_ndim)s; i++)
{
non_pooling_prod *= PyArray_DIMS(%(x)s)[i];
}
%(omp_parallel)s
for(int t = 0; t < PyArray_DIMS(%(x)s)[0] * PyArray_DIMS(%(x)s)[1]; t++){
int b = t %% PyArray_DIMS(%(x)s)[0];
int k = t / PyArray_DIMS(%(x)s)[0];
for(int i=0; i < z_r; i++){
r_st = i * st0;
r_end = r_st + ws0;
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (%(non_pool_ndim)s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<%(non_pool_ndim)s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(%(x)s)[i - 1];
o_idx[i - 1] = o_idx[i - 1] %% PyArray_DIMS(%(x)s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for i in xrange(nd):
ccode += """
for (r_idx[%(i)s]=0; r_idx[%(i)s] < z[%(i)s]; r_idx[%(i)s]++) {
r_st[%(i)s] = r_idx[%(i)s] * st[%(i)s];
r_end[%(i)s] = r_st[%(i)s] + ws[%(i)s];
// skip the padding
r_st = r_st < pd0 ? pd0 : r_st;
r_end = r_end > (r - pd0) ? r - pd0 : r_end;
r_st[%(i)s] = r_st[%(i)s] < pd[%(i)s] ? pd[%(i)s] : r_st[%(i)s];
r_end[%(i)s] = r_end[%(i)s] > (r[%(i)s] - pd[%(i)s]) ? r[%(i)s] - pd[%(i)s] : r_end[%(i)s];
// from padded_img space to img space
r_st -= pd0;
r_end -= pd0;
for(int j=0; j<z_c; j++){
c_st = j * st1;
c_end = c_st + ws1;
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st;
c_end = c_end > (c - pd1) ? c - pd1 : c_end;
// from padding_img space into img space
c_st -= pd1;
c_end -= pd1;
r_st[%(i)s] -= pd[%(i)s];
r_end[%(i)s] -= pd[%(i)s];
// use the index to find the correct position in the output
o_idx[%(non_pool_ndim)s + %(i)s] = r_idx[%(i)s];
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(z)s * z;
if (%(total_ndim)s == 4)
{
// the maximum value
maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
// z at this position
z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the maximum value
maximum = ((dtype_%(maxout)s*)(PyArray_GETPTR4(%(maxout)s,b,k,i,j)))[0];
maximum = ((dtype_%(maxout)s*)(PyArray_GetPtr(%(maxout)s,o_idx)))[0];
// z at this position
dtype_%(z)s * z = ((dtype_%(z)s*)(PyArray_GETPTR4(%(z)s, b, k, i, j)));
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
z = ((dtype_%(z)s*)(PyArray_GetPtr(%(z)s,o_idx)));
}
"""
for i in xrange(nd):
ccode += """
// go through the pooled region in the unpadded input
for(int m%(i)s=r_st[%(i)s]; m%(i)s<r_end[%(i)s]; m%(i)s++)
{
i_idx[%(non_pool_ndim)s + %(i)s] = m%(i)s;
""" % dict(i=i, non_pool_ndim=non_pool_ndim)
ccode += """
dtype_%(x)s a;
dtype_%(ggx)s * ggx;
if (%(total_ndim)s == 4)
{
for(int n=c_st; n<c_end; n++)
{
dtype_%(x)s a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,b,k,m,n)))[0];
dtype_%(ggx)s * ggx = (
(dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s, b, k, m, n)));
if (a == maximum){
z[0] += ggx[0];
}
}
a = ((dtype_%(x)s*)(PyArray_GETPTR4(%(x)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
ggx = ((dtype_%(ggx)s*)(PyArray_GETPTR4(%(ggx)s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
}
}
}
}
""" % locals()
else
{
a = ((dtype_%(x)s*)(PyArray_GetPtr(%(x)s,i_idx)))[0];
ggx = ((dtype_%(ggx)s*)(PyArray_GetPtr(%(ggx)s,i_idx)));
}
if (a == maximum){
z[0] += ggx[0];
}
"""
for i in xrange(nd):
ccode += """
} // for loop over region
"""
for i in xrange(nd):
ccode += """
} // loop over pooling dimension
"""
ccode += """
} // for loop over non-pooling dimensions
"""
return ccode % locals()
def c_code_cache_version(self):
return (0, 3, self.openmp)
return (0, 4, self.openmp)
from __future__ import absolute_import, print_function, division
from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
from itertools import product
import os
import unittest
......@@ -14,7 +15,7 @@ import numpy
import theano
import theano.tensor as tensor
from theano.tests import unittest_tools as utt
from theano.tensor.signal.pool import (Pool, pool_2d,
from theano.tensor.signal.pool import (Pool, pool_2d, pool_3d,
MaxPoolGrad, AveragePoolGrad,
max_pool_2d_same_size,
DownsampleFactorMaxGradGrad)
......@@ -57,6 +58,36 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val[k][i, j] = func(patch)
return output_val
@staticmethod
def numpy_max_pool_nd(input, ds, ignore_border=False, mode='max'):
'''Helper function, implementing pool_nd in pure numpy'''
if len(input.shape) < len(ds):
raise NotImplementedError('input should have at least %s dim,'
' shape is %s'
% (str(ds), str(input.shape)))
nd = len(ds)
si = [0] * nd
if not ignore_border:
for i in range(nd):
if input.shape[-nd + i] % ds[i]:
si[i] += 1
out_shp = list(input.shape[:-nd])
for i in range(nd):
out_shp.append(input.shape[-nd + i] // ds[i] + si[i])
output_val = numpy.zeros(out_shp)
func = numpy.max
if mode == 'sum':
func = numpy.sum
elif mode != 'max':
func = numpy.average
for l in numpy.ndindex(*input.shape[:-nd]):
for r in numpy.ndindex(*output_val.shape[-nd:]):
patch = input[l][tuple(slice(r[i] * ds[i], (r[i] + 1) * ds[i])
for i in range(nd))]
output_val[l][r] = func(patch)
return output_val
@staticmethod
def numpy_max_pool_2d_stride_padding(
x, ds, ignore_border=True, st=None, padding=(0, 0), mode='max'):
......@@ -111,6 +142,60 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val[k][i, j] = func(patch)
return output_val
@staticmethod
def numpy_max_pool_nd_stride_padding(
input, ds, ignore_border=True, st=None, padding=None, mode='max'):
assert ignore_border
nd = len(ds)
if padding is None:
padding = (0,) * nd
if st is None:
st = (0,) * nd
assert len(padding) == len(ds) == len(st)
assert all(ds[i] > padding[i] for i in range(nd))
def pad_img(x):
# initialize padded input
y = numpy.zeros(
x.shape[0:-nd] +
tuple(x.shape[-nd + i] + padding[i] * 2 for i in range(nd)),
dtype=x.dtype)
# place the unpadded input in the center
block = ((slice(None),) * (len(x.shape) - nd) +
tuple(slice(padding[i], x.shape[-nd + i] + padding[i])
for i in range(nd)))
y[block] = x
return y
pad_img_shp = list(input.shape[:-nd])
out_shp = list(input.shape[:-nd])
for i in range(nd):
padded_size = input.shape[-nd + i] + 2 * padding[i]
pad_img_shp.append(padded_size)
out_shp.append((padded_size - ds[i]) // st[i] + 1)
output_val = numpy.zeros(out_shp)
padded_input = pad_img(input)
func = numpy.max
if mode == 'sum':
func = numpy.sum
elif mode != 'max':
func = numpy.average
inc_pad = mode == 'average_inc_pad'
for l in numpy.ndindex(*input.shape[:-nd]):
for r in numpy.ndindex(*output_val.shape[-nd:]):
region = []
for i in range(nd):
r_st = r[i] * st[i]
r_end = builtins.min(r_st + ds[i], pad_img_shp[-nd + i])
if not inc_pad:
r_st = builtins.max(r_st, padding[i])
r_end = builtins.min(r_end, input.shape[-nd + i] + padding[i])
region.append(slice(r_st, r_end))
patch = padded_input[l][region]
output_val[l][r] = func(patch)
return output_val
@staticmethod
def numpy_max_pool_2d_stride(input, ds, ignore_border=False, st=None,
mode='max'):
......@@ -174,84 +259,167 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val[k][i, j] = func(patch)
return output_val
@staticmethod
def numpy_max_pool_nd_stride(input, ds, ignore_border=False, st=None,
mode='max'):
'''Helper function, implementing pooling in pure numpy
this function provides st input to indicate the stide size
for the pooling regions. if not indicated, st == sd.'''
nd = len(ds)
if st is None:
st = ds
assert len(st) == len(ds)
out_shp = list(input.shape[:-nd])
for i in range(nd):
out = 0
if input.shape[-nd + i] - ds[i] >= 0:
out = (input.shape[-nd + i] - ds[i]) // st[i] + 1
if not ignore_border:
if out > 0:
if input.shape[-nd + i] - ((out - 1) * st[i] + ds[i]) > 0:
if input.shape[-nd + i] - out * st[i] > 0:
out += 1
else:
if input.shape[-nd + i] > 0:
out += 1
out_shp.append(out)
func = numpy.max
if mode == 'sum':
func = numpy.sum
elif mode != 'max':
func = numpy.average
output_val = numpy.zeros(out_shp)
for l in numpy.ndindex(*input.shape[:-nd]):
for r in numpy.ndindex(*output_val.shape[-nd:]):
region = []
for i in range(nd):
r_st = r[i] * st[i]
r_end = builtins.min(r_st + ds[i], input.shape[-nd + i])
region.append(slice(r_st, r_end))
patch = input[l][region]
output_val[l][r] = func(patch)
return output_val
def test_DownsampleFactorMax(self):
rng = numpy.random.RandomState(utt.fetch_seed())
# generate random images
maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3))
imval = rng.rand(4, 2, 16, 16)
images = tensor.dtensor4()
for maxpoolshp, ignore_border, mode in product(maxpoolshps,
[True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad']):
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
# Pure Numpy computation
numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp,
ignore_border,
mode=mode)
# maxpool, input size
examples = (
((2,), (16,)),
((2,), (4, 16,)),
((2,), (4, 2, 16,)),
((1, 1), (4, 2, 16, 16)),
((2, 2), (4, 2, 16, 16)),
((3, 3), (4, 2, 16, 16)),
((3, 2), (4, 2, 16, 16)),
((3, 2, 2), (3, 2, 16, 16, 16)),
((2, 3, 2), (3, 2, 16, 16, 16)),
((2, 2, 3), (3, 2, 16, 16, 16)),
((2, 2, 3, 2), (3, 2, 6, 6, 6, 5)),
)
for example, ignore_border, mode in product(examples,
[True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad']):
(maxpoolshp, inputsize) = example
imval = rng.rand(*inputsize)
images = theano.shared(imval)
# Pure Numpy computation
numpy_output_val = self.numpy_max_pool_nd(imval, maxpoolshp,
ignore_border,
mode=mode)
# The pool_2d or pool_3d helper methods
if len(maxpoolshp) == 2:
output = pool_2d(images, maxpoolshp, ignore_border,
mode=mode)
f = function([images, ], [output, ])
output_val = f(imval)
f = function([], [output, ])
output_val = f()
utt.assert_allclose(output_val, numpy_output_val)
# Pool op
maxpool_op = Pool(ignore_border=ignore_border,
mode=mode)(images, maxpoolshp)
output_shape = Pool.out_shape(imval.shape, maxpoolshp,
ignore_border=ignore_border)
utt.assert_allclose(numpy.asarray(output_shape), numpy_output_val.shape)
f = function([images], maxpool_op)
output_val = f(imval)
elif len(maxpoolshp) == 3:
output = pool_3d(images, maxpoolshp, ignore_border,
mode=mode)
f = function([], [output, ])
output_val = f()
utt.assert_allclose(output_val, numpy_output_val)
# Pool op
maxpool_op = Pool(ndim=len(maxpoolshp),
ignore_border=ignore_border,
mode=mode)(images, maxpoolshp)
output_shape = Pool.out_shape(imval.shape, maxpoolshp,
ndim=len(maxpoolshp),
ignore_border=ignore_border)
utt.assert_allclose(numpy.asarray(output_shape), numpy_output_val.shape)
f = function([], maxpool_op)
output_val = f()
utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxStride(self):
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 3), (5, 3), (16, 16))
stridesizes = ((1, 1), (3, 3), (5, 7),)
# generate random images
imval = rng.rand(4, 10, 16, 16)
# The same for each mode
outputshps = (
(4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3),
(4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3),
(4, 10, 14, 14), (4, 10, 5, 5), (4, 10, 3, 2),
(4, 10, 14, 14), (4, 10, 6, 6), (4, 10, 4, 3),
(4, 10, 12, 14), (4, 10, 4, 5), (4, 10, 3, 2),
(4, 10, 12, 14), (4, 10, 5, 6), (4, 10, 4, 3),
(4, 10, 1, 1), (4, 10, 1, 1), (4, 10, 1, 1),
(4, 10, 1, 1), (4, 10, 1, 1), (4, 10, 1, 1),)
images = tensor.dtensor4()
indx = 0
for mode, maxpoolshp, ignore_border in product(['max',
'sum',
'average_inc_pad',
'average_exc_pad'],
maxpoolshps,
[True, False]):
for stride in stridesizes:
outputshp = outputshps[indx % len(outputshps)]
indx += 1
# Pool op
numpy_output_val = \
self.numpy_max_pool_2d_stride(imval, maxpoolshp,
ignore_border, stride,
mode)
assert numpy_output_val.shape == outputshp, (
"outshape is %s, calculated shape is %s"
% (outputshp, numpy_output_val.shape))
maxpool_op = \
Pool(ignore_border=ignore_border, mode=mode)(
images, maxpoolshp, stride)
f = function([images], maxpool_op)
output_val = f(imval)
utt.assert_allclose(output_val, numpy_output_val)
# maxpool, stride, ignore_border, input, output sizes
examples = (
((1, 1), (1, 1), True, (4, 10, 16, 16), (4, 10, 16, 16)),
((1, 1), (3, 3), True, (4, 10, 16, 16), (4, 10, 6, 6)),
((1, 1), (5, 7), True, (4, 10, 16, 16), (4, 10, 4, 3)),
((1, 1), (1, 1), False, (4, 10, 16, 16), (4, 10, 16, 16)),
((1, 1), (3, 3), False, (4, 10, 16, 16), (4, 10, 6, 6)),
((1, 1), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
((3, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 14, 14)),
((3, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 5, 5)),
((3, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
((3, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 14, 14)),
((3, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 6, 6)),
((3, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
((5, 3), (1, 1), True, (4, 10, 16, 16), (4, 10, 12, 14)),
((5, 3), (3, 3), True, (4, 10, 16, 16), (4, 10, 4, 5)),
((5, 3), (5, 7), True, (4, 10, 16, 16), (4, 10, 3, 2)),
((5, 3), (1, 1), False, (4, 10, 16, 16), (4, 10, 12, 14)),
((5, 3), (3, 3), False, (4, 10, 16, 16), (4, 10, 5, 6)),
((5, 3), (5, 7), False, (4, 10, 16, 16), (4, 10, 4, 3)),
((16, 16), (1, 1), True, (4, 10, 16, 16), (4, 10, 1, 1)),
((16, 16), (3, 3), True, (4, 10, 16, 16), (4, 10, 1, 1)),
((16, 16), (5, 7), True, (4, 10, 16, 16), (4, 10, 1, 1)),
((16, 16), (1, 1), False, (4, 10, 16, 16), (4, 10, 1, 1)),
((16, 16), (3, 3), False, (4, 10, 16, 16), (4, 10, 1, 1)),
((16, 16), (5, 7), False, (4, 10, 16, 16), (4, 10, 1, 1)),
((3,), (5,), True, (16,), (3,)),
((3,), (5,), True, (2, 16,), (2, 3,)),
((5,), (3,), True, (2, 3, 16,), (2, 3, 4,)),
((5, 1, 3), (3, 3, 3), True, (2, 16, 16, 16), (2, 4, 6, 5)),
((5, 1, 3), (3, 3, 3), True, (4, 2, 16, 16, 16), (4, 2, 4, 6, 5)),
)
for example, mode in product(examples, ['max',
'sum',
'average_inc_pad',
'average_exc_pad']):
(maxpoolshp, stride, ignore_border, inputshp, outputshp) = example
# generate random images
imval = rng.rand(*inputshp)
images = theano.shared(imval)
# Pool op
numpy_output_val = \
self.numpy_max_pool_nd_stride(imval, maxpoolshp,
ignore_border, stride,
mode)
assert numpy_output_val.shape == outputshp, (
"outshape is %s, calculated shape is %s"
% (outputshp, numpy_output_val.shape))
maxpool_op = \
Pool(ndim=len(maxpoolshp),
ignore_border=ignore_border,
mode=mode)(images, maxpoolshp, stride)
f = function([], maxpool_op)
output_val = f()
utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxStrideExtra(self):
rng = numpy.random.RandomState(utt.fetch_seed())
......@@ -287,7 +455,8 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
"outshape is %s, calculated shape is %s"
% (outputshp, numpy_output_val.shape))
maxpool_op = \
Pool(ignore_border=ignore_border, mode=mode)(
Pool(ignore_border=ignore_border,
ndim=len(maxpoolshp), mode=mode)(
images, maxpoolshp, stride)
f = function([images], maxpool_op)
output_val = f(imval)
......@@ -296,319 +465,351 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
def test_DownsampleFactorMaxPaddingStride(self):
ignore_border = True # padding does not support ignore_border=False
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolsizes = [(3, 3), (4, 4), (3, 4), (4, 3), (2, 2)]
stridesizes = [(2, 2), (2, 2), (1, 1), (1, 2), (2, 2)]
paddingsizes = [(2, 2), (1, 2), (2, 1), (0, 0), (1, 1)]
imgsizes = [(5, 5), (5, 5), (5, 6), (6, 5), (5, 5)]
m = 4 # minibatch
c = 2 # channel size
images = tensor.dtensor4()
for indx, mode in product(numpy.arange(len(maxpoolsizes)),
['max', 'sum', 'average_inc_pad',
'average_exc_pad']):
imgsize = imgsizes[indx]
imval = rng.rand(m, c, imgsize[0], imgsize[1]) - 0.5
stridesize = stridesizes[indx]
maxpoolsize = maxpoolsizes[indx]
paddingsize = paddingsizes[indx]
numpy_output_val = self.numpy_max_pool_2d_stride_padding(
imval, maxpoolsize, ignore_border,
# maxpool, stride, padding, input sizes
examples = (
((3,), (2,), (2,), (5,)),
((3,), (2,), (2,), (4, 5)),
((3,), (2,), (2,), (4, 2, 5)),
((3,), (2,), (2,), (4, 2, 5, 5)),
((3, 3), (2, 2), (2, 2), (4, 2, 5, 5)),
((4, 4), (2, 2), (1, 2), (4, 2, 5, 5)),
((3, 4), (1, 1), (2, 1), (4, 2, 5, 6)),
((4, 3), (1, 2), (0, 0), (4, 2, 6, 5)),
((2, 2), (2, 2), (1, 1), (4, 2, 5, 5)),
((4, 3, 2), (1, 2, 2), (0, 2, 1), (4, 6, 6, 5)),
((4, 3, 2), (1, 2, 2), (0, 2, 1), (4, 2, 6, 5, 5)),
)
for example, mode in product(examples,
['max', 'sum', 'average_inc_pad',
'average_exc_pad']):
(maxpoolshp, stridesize, paddingsize, inputsize) = example
imval = rng.rand(*inputsize) - 0.5
images = theano.shared(imval)
numpy_output_val = self.numpy_max_pool_nd_stride_padding(
imval, maxpoolshp, ignore_border,
stridesize, paddingsize, mode)
maxpool_op = Pool(ignore_border=ignore_border, mode=mode)(
images, maxpoolsize, stridesize, paddingsize)
f = function([images], maxpool_op)
output_val = f(imval)
maxpool_op = Pool(
ndim=len(maxpoolshp),
ignore_border=ignore_border,
mode=mode
)(images, maxpoolshp, stridesize, paddingsize)
f = function([], maxpool_op)
output_val = f()
utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxPaddingStride_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5))
maxpoolsizes = ((5, 3), (3, 5), (3, 3))
stridesizes = ((3, 2), (2, 3), (3, 3))
paddingsizes = ((2, 2), (2, 1), (2, 2))
# maxpool, stride, padding, input sizes
examples = (
((10,), (5,), (3,), (2,)),
((10,), (5,), (3,), (2, 2)),
((10,), (5,), (3,), (1, 1, 2)),
((10, 10), (5, 3), (3, 2), (1, 1, 2, 2)),
((10, 5), (3, 5), (2, 3), (1, 1, 2, 1)),
((5, 5), (3, 3), (3, 3), (1, 1, 2, 2)),
((5, 5, 5), (3, 3, 3), (3, 3, 3), (1, 1, 2, 2, 2)),
)
# average_inc_pad and average_exc_pad do not
# support grad with padding
for mode in ['max', 'sum']:
for i in range(len(imgsizes)):
imgsize = imgsizes[i]
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
maxpoolsize = maxpoolsizes[i]
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
for example in examples:
(maxpoolshp, stridesize, paddingsize, inputsize) = example
imval = rng.rand(*inputsize) * 10.0
def mp(input):
return Pool(ignore_border=True, mode=mode)(
input, maxpoolsize, stridesize, paddingsize)
return Pool(
ndim=len(maxpoolshp),
ignore_border=True,
mode=mode,
)(input, maxpoolshp, stridesize, paddingsize)
utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMax_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 2), (2, 3))
imval = rng.rand(2, 3, 3, 4) * 10.0
# more variance means numeric gradient will be more accurate
for maxpoolshp, ignore_border, mode in product(maxpoolshps,
[True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad']):
# maxpool, input sizes
examples = (
((2,), (3,)),
((2,), (2, 3)),
((2,), (2, 3, 3)),
((1, 1), (2, 3, 3, 4)),
((3, 2), (2, 3, 3, 4)),
((2, 3), (2, 3, 3, 4)),
((1, 1, 1), (2, 3, 3)),
((3, 2, 2), (2, 3, 3, 4)),
((2, 3, 2), (2, 3, 3, 4, 4)),
((2, 2, 3), (2, 3, 3, 4, 4)),
)
for example, ignore_border, mode in product(examples,
[True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad']):
(maxpoolshp, inputsize) = example
imval = rng.rand(*inputsize) * 10.0
# more variance means numeric gradient will be more accurate
def mp(input):
return Pool(ignore_border=ignore_border, mode=mode)(
input, maxpoolshp)
return Pool(ndim=len(maxpoolshp),
ignore_border=ignore_border,
mode=mode)(input, maxpoolshp)
utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMax_grad_st(self):
# pool, stride, input sizes
pool_grad_st_examples = (
((1,), (1,), (16,)),
((1,), (3,), (1, 16)),
((1,), (5,), (1, 2, 16)),
((2,), (1,), (16,)),
((2,), (3,), (1, 16)),
((2,), (5,), (1, 2, 16)),
((1, 1), (1, 1), (1, 2, 16, 16)),
((1, 1), (3, 3), (1, 2, 16, 16)),
((1, 1), (5, 7), (1, 2, 16, 16)),
((3, 3), (1, 1), (1, 2, 16, 16)),
((3, 3), (3, 3), (1, 2, 16, 16)),
((3, 3), (5, 7), (1, 2, 16, 16)),
((5, 3), (1, 1), (1, 2, 16, 16)),
((5, 3), (3, 3), (1, 2, 16, 16)),
((5, 3), (5, 7), (1, 2, 16, 16)),
((5, 1, 2), (1, 1, 1), (16, 3, 16)),
((5, 1, 2), (3, 1, 2), (1, 16, 3, 16)),
((5, 1, 2), (5, 1, 4), (1, 2, 16, 3, 16)),
((5, 3), (3, 2), (1, 2, 16, 16)),
((5, 3), (7, 5), (1, 2, 16, 16)),
((5, 3), (10, 6), (1, 2, 16, 16)),
((5, 5), (1, 1), (1, 2, 8, 5)),
((3, 2), (2, 3), (1, 2, 8, 5)),
((7, 7), (10, 10), (1, 2, 8, 5)),
((9, 9), (1, 1), (1, 2, 8, 5)),
)
@parameterized.expand(product(pool_grad_st_examples,
[True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad']),
testcase_func_name=utt.custom_name_func)
def test_DownsampleFactorMax_grad_st(self, example, ignore_border, mode):
# checks the gradient for the case that stride is used
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 3), (5, 3))
stridesizes = ((1, 1), (3, 3), (5, 7))
imval = rng.rand(1, 2, 16, 16)
for maxpoolshp, ignore_border, mode, stride in product(maxpoolshps,
[True, False],
['max',
'sum',
'average_inc_pad',
'average_exc_pad'],
stridesizes):
def mp(input):
return Pool(ignore_border=ignore_border, mode=mode)(
input, maxpoolshp, stride)
utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMax_grad_st_extra(self):
# checks the gradient for the case
# that stride is used for extra examples
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
(2, 3), (10, 10), (1, 1))
imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
(8, 5), (8, 5), (8, 5))
(maxpoolshp, stridesize, inputsize) = example
imval = rng.rand(*inputsize)
for mode in ['max', 'sum', 'average_inc_pad', 'average_exc_pad']:
for indx in numpy.arange(len(maxpoolshps)):
imvsize = imvsizs[indx]
imval = rng.rand(1, 2, imvsize[0], imvsize[1])
stride = stridesizes[indx]
maxpoolshp = maxpoolshps[indx]
for ignore_border in [True, False]:
def mp(input):
return Pool(ignore_border=ignore_border, mode=mode)(
input, maxpoolshp, stride)
utt.verify_grad(mp, [imval], rng=rng)
def mp(input):
return Pool(ndim=len(maxpoolshp),
ignore_border=ignore_border,
mode=mode)(input, maxpoolshp, stridesize)
utt.verify_grad(mp, [imval], rng=rng)
def test_DownsampleFactorMaxGrad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 2), (2, 3))
imval = rng.rand(2, 3, 3, 4) * 10.0
# more variance means numeric gradient will be more accurate
for maxpoolshp in maxpoolshps:
# maxpool, input sizes
examples = (
((2,), (2,)),
((2,), (2, 3)),
((1, 1), (2, 3, 3, 4)),
((3, 2), (2, 3, 3, 4)),
((2, 3), (2, 3, 3, 4)),
((1, 1, 1), (2, 3, 3, 4)),
((3, 2, 2), (2, 3, 3, 4)),
((2, 3, 2), (2, 3, 3, 4)),
((2, 2, 3), (2, 3, 3, 4)),
((2, 2, 3), (2, 1, 3, 3, 4)),
)
for (maxpoolshp, inputsize) in examples:
imval = rng.rand(*inputsize) * 10.0
# more variance means numeric gradient will be more accurate
for ignore_border in [True, False]:
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
# The shape of the gradient will be the shape of the output
grad_shape = Pool.out_shape(
imval.shape, maxpoolshp, ignore_border=ignore_border)
imval.shape, maxpoolshp, ndim=len(maxpoolshp), ignore_border=ignore_border)
grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad):
out = Pool(ignore_border=ignore_border)(input, maxpoolshp)
grad_op = MaxPoolGrad(ignore_border=ignore_border)
out = Pool(
ndim=len(maxpoolshp),
ignore_border=ignore_border)(input, maxpoolshp)
grad_op = MaxPoolGrad(
ndim=len(maxpoolshp),
ignore_border=ignore_border)
return grad_op(input, out, grad, maxpoolshp)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolGrad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
avgpoolshps = ((1, 1), (3, 2), (2, 3))
imval = rng.rand(2, 3, 3, 4) * 10.0
# more variance means numeric gradient will be more accurate
for avgpoolshp in avgpoolshps:
# avgpool, input sizes
examples = (
((2,), (2,)),
((2,), (2, 3)),
((1, 1), (2, 3, 3, 4)),
((3, 2), (2, 3, 3, 4)),
((2, 3), (2, 3, 3, 4)),
((1, 1, 1), (2, 3, 3, 4)),
((3, 2, 2), (2, 3, 3, 4)),
((2, 3, 2), (2, 3, 3, 4)),
((2, 2, 3), (2, 3, 3, 4)),
((2, 2, 3), (2, 1, 3, 3, 4)),
)
for (avgpoolshp, inputsize) in examples:
imval = rng.rand(*inputsize) * 10.0
# more variance means numeric gradient will be more accurate
for ignore_border in [True, False]:
for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
# The shape of the gradient will be the shape of the output
grad_shape = Pool.out_shape(
imval.shape, avgpoolshp, ignore_border=ignore_border)
imval.shape, avgpoolshp, ndim=len(avgpoolshp),
ignore_border=ignore_border)
grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad):
grad_op = AveragePoolGrad(ignore_border=ignore_border,
mode=mode)
grad_op = AveragePoolGrad(
ndim=len(avgpoolshp),
ignore_border=ignore_border, mode=mode)
return grad_op(input, grad, avgpoolshp)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxGrad_grad_st(self):
@parameterized.expand(product(pool_grad_st_examples,
[True, False]),
testcase_func_name=utt.custom_name_func)
def test_DownsampleFactorMaxGrad_grad_st(self, example, ignore_border):
# checks the gradient of the gradient for
# the case that stride is used
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1), (3, 3), (5, 3))
stridesizes = ((1, 1), (3, 3), (5, 7))
imval = rng.rand(1, 2, 16, 16)
(maxpoolshp, stride, inputsize) = example
imval = rng.rand(*inputsize)
grad_shape = Pool.out_shape(
imval.shape, maxpoolshp, ndim=len(maxpoolshp),
ignore_border=ignore_border, st=stride)
for maxpoolshp in maxpoolshps:
for ignore_border in [True, False]:
for stride in stridesizes:
grad_shape = Pool.out_shape(
imval.shape, maxpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
# skip the grad verification when the output is empty
if numpy.prod(grad_shape) != 0:
grad_val = rng.rand(*grad_shape)
def mp(input, grad):
out = Pool(ignore_border=ignore_border)(
input, maxpoolshp, stride)
grad_op = MaxPoolGrad(ignore_border=ignore_border)
return grad_op(input, out, grad, maxpoolshp, stride)
def mp(input, grad):
out = Pool(
ndim=len(maxpoolshp),
ignore_border=ignore_border)(input, maxpoolshp, stride)
grad_op = MaxPoolGrad(
ndim=len(maxpoolshp),
ignore_border=ignore_border)
return grad_op(input, out, grad, maxpoolshp, stride)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolGrad_grad_st(self):
@parameterized.expand(product(pool_grad_st_examples,
[True, False],
['sum',
'average_inc_pad',
'average_exc_pad']),
testcase_func_name=utt.custom_name_func)
def test_AveragePoolGrad_grad_st(self, example, ignore_border, mode):
# checks the gradient of the gradient for
# the case that stride is used
rng = numpy.random.RandomState(utt.fetch_seed())
avgpoolshps = ((1, 1), (3, 3), (5, 3))
stridesizes = ((1, 1), (3, 3), (5, 7))
imval = rng.rand(1, 2, 16, 16)
for avgpoolshp in avgpoolshps:
for ignore_border in [True, False]:
for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
for stride in stridesizes:
grad_shape = Pool.out_shape(
imval.shape, avgpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
def mp(input, grad):
grad_op = AveragePoolGrad(
ignore_border=ignore_border, mode=mode)
return grad_op(input, grad, avgpoolshp, stride)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxGrad_grad_st_extra(self):
# checks the gradient of the gradient for the case that
# stride is used for extra examples
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
(2, 3), (10, 10), (1, 1))
imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
(8, 5), (8, 5), (8, 5))
for indx in numpy.arange(len(maxpoolshps)):
imvsize = imvsizs[indx]
imval = rng.rand(1, 2, imvsize[0], imvsize[1])
stride = stridesizes[indx]
maxpoolshp = maxpoolshps[indx]
for ignore_border in [True, False]:
grad_shape = Pool.out_shape(
imval.shape, maxpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
def mp(input, grad):
out = Pool(ignore_border=ignore_border)(input, maxpoolshp,
stride)
grad_op = MaxPoolGrad(ignore_border=ignore_border)
return grad_op(input, out, grad, maxpoolshp, stride)
# skip the grad verification when the output is empty
if numpy.prod(grad_shape) == 0:
continue
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolGrad_grad_st_extra(self):
# checks the gradient of the gradient for the case that
# stride is used for extra examples
rng = numpy.random.RandomState(utt.fetch_seed())
avgpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
(2, 3), (10, 10), (1, 1))
imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
(8, 5), (8, 5), (8, 5))
(avgpoolshp, stride, inputsize) = example
imval = rng.rand(*inputsize)
grad_shape = Pool.out_shape(
imval.shape, avgpoolshp,
ndim=len(avgpoolshp),
ignore_border=ignore_border, st=stride)
for indx in numpy.arange(len(avgpoolshps)):
imvsize = imvsizs[indx]
imval = rng.rand(1, 2, imvsize[0], imvsize[1])
stride = stridesizes[indx]
avgpoolshp = avgpoolshps[indx]
for ignore_border in [True, False]:
for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
grad_shape = Pool.out_shape(
imval.shape, avgpoolshp,
ignore_border=ignore_border, st=stride)
grad_val = rng.rand(*grad_shape)
# skip the grad verification when the output is empty
if numpy.prod(grad_shape) != 0:
grad_val = rng.rand(*grad_shape)
def mp(input, grad):
grad_op = AveragePoolGrad(ignore_border=ignore_border,
mode=mode)
return grad_op(input, grad, avgpoolshp, stride)
def mp(input, grad):
grad_op = AveragePoolGrad(
ndim=len(avgpoolshp),
ignore_border=ignore_border,
mode=mode)
return grad_op(input, grad, avgpoolshp, stride)
# skip the grad verification when the output is empty
if numpy.prod(grad_shape) == 0:
continue
utt.verify_grad(mp, [imval, grad_val], rng=rng)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5))
maxpoolsizes = ((5, 3), (3, 5), (3, 3))
stridesizes = ((3, 2), (2, 3), (3, 3))
paddingsizes = ((2, 2), (2, 1), (2, 2))
for i in range(len(imgsizes)):
imgsize = imgsizes[i]
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
maxpoolsize = maxpoolsizes[i]
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
# maxpool, stride, padding, input sizes
examples = (
((3,), (2,), (2,), (10,)),
((3,), (2,), (2,), (2, 10,)),
((3,), (2,), (2,), (2, 1, 10,)),
((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
((3, 3), (3, 3), (2, 2), (1, 1, 5, 5)),
((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
((3, 5, 3), (2, 3, 2), (2, 1, 2), (1, 1, 5, 10, 5)),
((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
)
for (maxpoolshp, stridesize, paddingsize, inputsize) in examples:
imval = rng.rand(*inputsize) * 10.0
grad_shape = Pool.out_shape(imval.shape,
maxpoolsize, st=stridesize,
maxpoolshp,
ndim=len(maxpoolshp),
st=stridesize,
ignore_border=True,
padding=paddingsize)
grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad):
out = Pool(ignore_border=True)(input, maxpoolsize, stridesize,
paddingsize)
grad_op = MaxPoolGrad(ignore_border=True)
return grad_op(input, out, grad, maxpoolsize, stridesize,
paddingsize)
out = Pool(
ndim=len(maxpoolshp),
ignore_border=True,
)(input, maxpoolshp, stridesize, paddingsize)
grad_op = MaxPoolGrad(ndim=len(maxpoolshp),
ignore_border=True)
return grad_op(input, out, grad, maxpoolshp, stridesize, paddingsize)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_AveragePoolPaddingStride_grad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5))
avgpoolsizes = ((5, 3), (3, 5), (3, 3))
stridesizes = ((3, 2), (2, 3), (3, 3))
paddingsizes = ((2, 2), (2, 1), (2, 2))
for i in range(len(imgsizes)):
imgsize = imgsizes[i]
imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
avgpoolsize = avgpoolsizes[i]
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
# avgpool, stride, padding, input sizes
examples = (
((3,), (2,), (2,), (10,)),
((3,), (2,), (2,), (2, 10,)),
((3,), (2,), (2,), (2, 1, 10,)),
((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
((3, 3), (3, 3), (2, 2), (1, 1, 5, 5)),
((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
((3, 5, 3), (2, 3, 2), (2, 1, 2), (1, 1, 5, 10, 5)),
((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
)
for (avgpoolshp, stridesize, paddingsize, inputsize) in examples:
imval = rng.rand(*inputsize) * 10.0
# 'average_exc_pad' with non-zero padding is not implemented
for mode in ['sum', 'average_inc_pad']:
grad_shape = Pool.out_shape(imval.shape,
avgpoolsize, st=stridesize,
ignore_border=True, padding=paddingsize)
avgpoolshp,
ndim=len(avgpoolshp),
st=stridesize,
ignore_border=True,
padding=paddingsize)
grad_val = rng.rand(*grad_shape) * 10.0
def mp(input, grad):
grad_op = AveragePoolGrad(ignore_border=True, mode=mode)
return grad_op(input, grad, avgpoolsize, stridesize, paddingsize)
grad_op = AveragePoolGrad(ndim=len(avgpoolshp),
ignore_border=True,
mode=mode)
return grad_op(input, grad, avgpoolshp, stridesize, paddingsize)
utt.verify_grad(mp, [imval, grad_val], rng=rng)
def test_DownsampleFactorMax_hessian(self):
......@@ -630,26 +831,31 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
def test_DownsampleFactorMaxGradGrad_grad(self):
rng = numpy.random.RandomState(utt.fetch_seed())
imgsizes = ((10, 10), (10, 5), (5, 5))
maxpoolsizes = ((5, 3), (3, 5), (3, 3))
stridesizes = ((3, 2), (2, 3), (3, 3))
paddingsizes = ((2, 2), (2, 1), (2, 2))
for i in range(len(imgsizes)):
imgsize = imgsizes[i]
imval1 = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
imval2 = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
maxpoolsize = maxpoolsizes[i]
stridesize = stridesizes[i]
paddingsize = paddingsizes[i]
# maxpool, stride, padding, input sizes
examples = (
((3,), (2,), (2,), (10,)),
((3,), (2,), (2,), (2, 10,)),
((3,), (2,), (2,), (2, 1, 10,)),
((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
((5, 3), (3, 2), (2, 2), (1, 1, 10, 10)),
((3, 5), (2, 3), (2, 1), (1, 1, 10, 5)),
((3, 3), (3, 3), (2, 2), (1, 1, 5, 5)),
((5, 3, 3), (3, 2, 2), (2, 2, 2), (1, 1, 10, 5, 5)),
((3, 5, 3), (2, 3, 2), (2, 1, 2), (1, 1, 5, 10, 5)),
((3, 3, 5), (2, 2, 3), (2, 2, 1), (1, 1, 5, 5, 10)),
)
for (maxpoolshp, stridesize, paddingsize, inputsize) in examples:
imval1 = rng.rand(*inputsize) * 10.0
imval2 = rng.rand(*inputsize) * 10.0
def mp(input1, input2):
op1 = Pool(ignore_border=True)
pooled_out = op1(input1, maxpoolsize, stride=stridesize,
pad=paddingsize)
op2 = DownsampleFactorMaxGradGrad(ignore_border=True)
out = op2(input1, pooled_out, input2, ws=maxpoolsize,
stride=stridesize, pad=paddingsize)
op1 = Pool(ndim=len(maxpoolshp), ignore_border=True)
pooled_out = op1(input1, maxpoolshp, stridesize, paddingsize)
op2 = DownsampleFactorMaxGradGrad(
ndim=len(maxpoolshp),
ignore_border=True)
out = op2(input1, pooled_out, input2, maxpoolshp, stridesize, paddingsize)
return out
utt.verify_grad(mp, [imval1, imval2], rng=rng)
......@@ -679,6 +885,32 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
mode=mode)
utt.verify_grad(mp, [imval], rng=rng)
def test_max_pool_3d_3D(self):
rng = numpy.random.RandomState(utt.fetch_seed())
maxpoolshps = ((1, 1, 1), (3, 2, 1))
imval = rng.rand(4, 5, 6)
images = tensor.dtensor3()
for maxpoolshp, ignore_border, mode in product(maxpoolshps,
[True, False],
['max', 'sum',
'average_inc_pad',
'average_exc_pad']):
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
numpy_output_val = self.numpy_max_pool_nd(imval, maxpoolshp,
ignore_border,
mode=mode)
output = pool_3d(images, maxpoolshp, ignore_border,
mode=mode)
output_val = function([images], output)(imval)
utt.assert_allclose(output_val, numpy_output_val)
def mp(input):
return pool_3d(input, maxpoolshp, ignore_border,
mode=mode)
utt.verify_grad(mp, [imval], rng=rng)
def test_max_pool_2d_2D_same_size(self):
rng = numpy.random.RandomState(utt.fetch_seed())
test_input_array = numpy.array([[[
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论