Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a16e91f7
提交
a16e91f7
authored
9月 15, 2016
作者:
Gijs van Tulder
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
theano.tensor.signal.Pool with 3D support.
上级
172e699c
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
2409 行增加
和
1060 行删除
+2409
-1060
dnn.py
theano/gpuarray/dnn.py
+65
-24
opt_util.py
theano/gpuarray/opt_util.py
+47
-2
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+283
-5
dnn.py
theano/sandbox/cuda/dnn.py
+74
-34
opt.py
theano/sandbox/cuda/opt.py
+59
-30
opt_util.py
theano/sandbox/cuda/opt_util.py
+47
-2
test_blas.py
theano/sandbox/cuda/tests/test_blas.py
+7
-5
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+102
-21
pool.py
theano/tensor/signal/pool.py
+1176
-620
test_pool.py
theano/tensor/signal/tests/test_pool.py
+549
-317
没有找到文件。
theano/gpuarray/dnn.py
浏览文件 @
a16e91f7
...
...
@@ -35,7 +35,7 @@ from .nnet import GpuSoftmax
from
.opt
import
(
gpu_seqopt
,
register_opt
,
op_lifter
,
register_opt2
)
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
,
pad_dims
,
unpad_dims
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
...
...
@@ -1253,7 +1253,7 @@ class GpuDnnPoolGrad(DnnBase):
return
[
shape
[
0
]]
def
dnn_pool
(
img
,
ws
,
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)
):
def
dnn_pool
(
img
,
ws
,
stride
=
None
,
mode
=
'max'
,
pad
=
None
):
"""
GPU pooling using cuDNN from NVIDIA.
...
...
@@ -1267,13 +1267,13 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
img
Images to do the pooling over.
ws : tuple
Subsampling window size.
Subsampling window size.
Should have 2 or 3 elements.
stride : tuple
Subsampling stride (default: (1, 1)).
Subsampling stride (default: (1, 1)
or (1, 1, 1)
).
mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad : tuple
(padX, padY) or (padX, padY, padZ)
default: (0, 0)
default: (0, 0)
or (0, 0, 0)
.. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
...
...
@@ -1285,6 +1285,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
img
=
gpu_contiguous
(
img
)
if
stride
is
None
:
stride
=
(
1
,)
*
len
(
ws
)
if
pad
is
None
:
pad
=
(
0
,)
*
len
(
ws
)
if
mode
==
"sum"
:
ret
=
GpuDnnPool
(
mode
=
"average_inc_pad"
)(
img
,
ws
,
stride
,
pad
)
context_name
=
ret
.
type
.
context_name
...
...
@@ -1868,9 +1872,18 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
if
not
op
.
ignore_border
:
return
img
,
ws
,
stride
,
pad
=
inputs
img
=
as_gpuarray_variable
(
img
,
ctx_name
)
nd
=
op
.
ndim
if
op
.
ndim
else
(
img
.
ndim
-
2
)
if
nd
not
in
(
2
,
3
):
return
img
=
gpu_contiguous
(
as_gpuarray_variable
(
img
,
ctx_name
))
mode
=
op
.
mode
return
dnn_pool
(
gpu_contiguous
(
img
),
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
if
img
.
ndim
==
nd
+
2
:
return
dnn_pool
(
img
,
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
else
:
# reshape to 4D or 5D with 2 non-pooling dimensions
img_padded
=
pad_dims
(
img
,
2
,
nd
)
ret_padded
=
dnn_pool
(
img_padded
,
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
return
unpad_dims
(
ret_padded
,
img
,
2
,
nd
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
...
...
@@ -1882,17 +1895,33 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if
not
op
.
ignore_border
:
return
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
=
as_gpuarray_variable
(
inp
,
ctx_name
)
out
=
as_gpuarray_variable
(
out
,
ctx_name
)
out_grad
=
as_gpuarray_variable
(
out_grad
,
ctx_name
)
nd
=
op
.
ndim
if
op
.
ndim
else
(
inp
.
ndim
-
2
)
if
nd
not
in
(
2
,
3
):
return
inp
=
gpu_contiguous
(
as_gpuarray_variable
(
inp
,
ctx_name
))
out
=
gpu_contiguous
(
as_gpuarray_variable
(
out
,
ctx_name
))
out_grad
=
gpu_contiguous
(
as_gpuarray_variable
(
out_grad
,
ctx_name
))
mode
=
op
.
mode
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
gpu_contiguous
(
out
),
gpu_contiguous
(
out_grad
),
ws
,
stride
,
pad
)
if
inp
.
ndim
==
nd
+
2
:
return
GpuDnnPoolGrad
(
mode
=
mode
)(
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
)
else
:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded
=
pad_dims
(
inp
,
2
,
nd
)
out_padded
=
pad_dims
(
out
,
2
,
nd
)
out_grad_padded
=
pad_dims
(
out_grad
,
2
,
nd
)
ret_padded
=
GpuDnnPoolGrad
(
mode
=
mode
)(
inp_padded
,
out_padded
,
out_grad_padded
,
ws
,
stride
,
pad
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
...
...
@@ -1904,16 +1933,28 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if
not
op
.
ignore_border
:
return
inp
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
=
as_gpuarray_variable
(
inp
,
ctx_name
)
out_grad
=
as_gpuarray_variable
(
out_grad
,
ctx_name
)
nd
=
op
.
ndim
if
op
.
ndim
else
(
inp
.
ndim
-
2
)
if
nd
not
in
(
2
,
3
):
return
inp
=
gpu_contiguous
(
as_gpuarray_variable
(
inp
,
ctx_name
))
out_grad
=
gpu_contiguous
(
as_gpuarray_variable
(
out_grad
,
ctx_name
))
mode
=
op
.
mode
cg
=
gpu_contiguous
(
out_grad
)
# We reuse cg because cuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
cg
,
cg
,
ws
,
stride
,
pad
)
if
inp
.
ndim
==
nd
+
2
:
# We reuse out_grad because cuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
return
GpuDnnPoolGrad
(
mode
=
mode
)(
inp
,
out_grad
,
out_grad
,
ws
,
stride
,
pad
)
else
:
inp_padded
=
pad_dims
(
inp
,
2
,
nd
)
out_grad_padded
=
pad_dims
(
out_grad
,
2
,
nd
)
ret_padded
=
GpuDnnPoolGrad
(
mode
=
mode
)(
inp_padded
,
out_grad_padded
,
out_grad_padded
,
ws
,
stride
,
pad
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
...
...
theano/gpuarray/opt_util.py
浏览文件 @
a16e91f7
...
...
@@ -3,12 +3,12 @@ from functools import wraps
import
numpy
from
theano
import
scalar
as
scal
,
Constant
from
theano
import
tensor
,
scalar
as
scal
,
Constant
from
theano.gof
import
local_optimizer
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
NotScalarConstantError
)
from
.basic_ops
import
GpuFromHost
,
HostFromGpu
,
GpuAllocEmpty
,
gpu_alloc_empty
from
.basic_ops
import
GpuFromHost
,
HostFromGpu
,
GpuAllocEmpty
,
GpuReshape
,
gpu_alloc_empty
from
.elemwise
import
GpuDimShuffle
,
GpuElemwise
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
...
...
@@ -329,3 +329,48 @@ def inplace_allocempty(op, idx):
return
maker
(
node
,
inputs
)
return
opt
return
wrapper
def
pad_dims
(
input
,
leftdims
,
rightdims
):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert
input
.
ndim
>=
rightdims
if
input
.
ndim
==
(
leftdims
+
rightdims
):
return
input
# extract image dimensions
img_shape
=
input
.
shape
[
-
rightdims
:]
# count the number of "leading" dimensions, store as dmatrix
batch_size
=
tensor
.
prod
(
input
.
shape
[:
-
rightdims
])
batch_size
=
tensor
.
shape_padright
(
batch_size
,
1
)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape
=
tensor
.
cast
(
tensor
.
join
(
0
,
batch_size
,
tensor
.
as_tensor
([
1
]
*
(
leftdims
-
1
)),
img_shape
),
'int64'
)
input_ND
=
GpuReshape
(
leftdims
+
rightdims
)(
input
,
new_shape
)
return
input_ND
def
unpad_dims
(
output
,
input
,
leftdims
,
rightdims
):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if
output
.
ndim
==
input
.
ndim
:
return
output
# restore the output to the original shape
outshp
=
tensor
.
join
(
0
,
input
.
shape
[:
-
rightdims
],
output
.
shape
[
-
rightdims
:])
return
GpuReshape
(
input
.
ndim
)(
output
,
outshp
)
theano/gpuarray/tests/test_dnn.py
浏览文件 @
a16e91f7
...
...
@@ -11,11 +11,12 @@ from six import StringIO
import
theano.tensor
as
T
import
theano.tests.unittest_tools
as
utt
from
theano.sandbox.neighbours
import
images2neibs
from
theano.tensor.signal.pool
import
pool_2d
from
theano.tensor.signal.pool
import
MaxPoolGrad
,
AveragePoolGrad
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
from
..
import
dnn
from
..basic_ops
import
GpuAllocEmpty
from
..type
import
gpuarray_shared_constructor
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
from
.
import
test_nnet
...
...
@@ -166,6 +167,27 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None,
return
pooled_output
def
pool3d2d
(
input
,
ds
=
(
2
,
2
,
2
),
strides
=
None
,
pad
=
(
0
,
0
,
0
),
pool_function
=
T
.
max
,
mode
=
'ignore_borders'
):
if
strides
is
None
:
strides
=
ds
shape
=
input
.
shape
# reshape to B, C*0, 1, 2 and do the pooling on 1, 2
first
=
input
.
reshape
((
shape
[
0
],
shape
[
1
]
*
shape
[
2
],
shape
[
3
],
shape
[
4
]))
pooled1
=
pool_2d_i2n
(
first
,
ds
=
ds
[
1
:],
strides
=
strides
[
1
:],
pad
=
pad
[
1
:],
pool_function
=
pool_function
,
mode
=
mode
)
shp1
=
pooled1
.
shape
# reshape to B, C, 0, 1*2 and do the pooling on 0
second
=
pooled1
.
reshape
((
shape
[
0
],
shape
[
1
],
shape
[
2
],
shp1
[
2
]
*
shp1
[
3
]))
pooled2
=
pool_2d_i2n
(
second
,
ds
=
(
ds
[
0
],
1
),
strides
=
(
strides
[
0
],
1
),
pad
=
(
pad
[
0
],
0
),
pool_function
=
pool_function
,
mode
=
mode
)
shp2
=
pooled2
.
shape
return
pooled2
.
reshape
((
shape
[
0
],
shape
[
1
],
shp2
[
2
],
shp1
[
2
],
shp1
[
3
]))
def
test_pooling
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
...
@@ -178,7 +200,7 @@ def test_pooling():
x
=
T
.
ftensor4
()
for
mode
,
pad
in
product
(
modes
,
((
0
,
0
),
(
1
,
0
),
(
1
,
0
),
(
2
,
3
),
(
3
,
2
))):
((
0
,
0
),
(
1
,
0
),
(
0
,
1
),
(
2
,
3
),
(
3
,
2
))):
if
mode
==
'max'
:
func
=
T
.
max
else
:
...
...
@@ -278,6 +300,119 @@ def test_pooling():
utt
.
assert_allclose
(
c_out
,
g_out
)
def
test_pooling_3d
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
# 'average_exc_pad' is disabled for versions < 4004
if
dnn
.
version
(
raises
=
False
)
<
4004
:
modes
=
(
'max'
,
'average_inc_pad'
)
else
:
modes
=
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
x
=
T
.
ftensor5
()
for
mode
,
pad
in
product
(
modes
,
((
0
,
0
,
0
),
(
1
,
0
,
0
),
(
0
,
1
,
0
),
(
0
,
0
,
1
),
(
3
,
2
,
2
),
(
2
,
3
,
2
),
(
2
,
2
,
3
))):
if
mode
==
'max'
:
func
=
T
.
max
else
:
func
=
T
.
mean
if
pad
!=
(
0
,
0
,
0
)
and
func
is
T
.
mean
:
continue
for
ws
in
(
4
,
2
,
5
):
for
stride
in
(
2
,
3
):
if
stride
>
ws
:
continue
if
pad
[
0
]
>
stride
or
pad
[
1
]
>
stride
or
pad
[
2
]
>
stride
:
# Not implemented
continue
# We will check that the opt introduced it.
out1
=
pool_3d
(
x
,
(
ws
,
ws
,
ws
),
st
=
(
stride
,
stride
,
stride
),
ignore_border
=
True
,
padding
=
pad
,
mode
=
mode
)
out2
=
pool3d2d
(
x
,
ds
=
(
ws
,
ws
,
ws
),
strides
=
(
stride
,
stride
,
stride
),
pad
=
pad
,
pool_function
=
func
)
mode_without_gpu2
=
mode_without_gpu
.
including
()
mode_without_gpu2
.
check_isfinite
=
False
f1
=
theano
.
function
([
x
],
out1
,
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPool
)
for
node
in
f1
.
maker
.
fgraph
.
apply_nodes
])
f2
=
theano
.
function
([
x
],
out2
,
mode
=
mode_without_gpu2
)
assert
not
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPool
)
for
node
in
f2
.
maker
.
fgraph
.
apply_nodes
])
for
shp
in
[(
1
,
10
,
30
,
30
,
30
),
(
1
,
3
,
29
,
29
,
29
),
(
3
,
1
,
47
,
97
,
47
),
]:
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
a
=
f1
(
data
)
b
=
f2
(
data
)
utt
.
assert_allclose
(
a
,
b
)
# Test the grad
for
shp
in
[(
1
,
1
,
2
,
2
,
2
),
(
1
,
1
,
3
,
3
,
3
)]:
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
*
10
ws
=
2
stride
=
2
if
pad
[
0
]
>
stride
or
pad
[
1
]
>
stride
or
pad
[
2
]
>
stride
:
# Not implemented
continue
# This test the CPU grad + opt + GPU implemtentation
def
fn
(
x
):
return
pool_3d
(
x
,
(
ws
,
ws
,
ws
),
ignore_border
=
True
,
padding
=
pad
,
mode
=
mode
)
utt
.
verify_grad
(
fn
,
[
data
],
cast_to_output_type
=
False
,
mode
=
mode_with_gpu
)
# Confirm that the opt would have inserted it.
fg
=
theano
.
function
([
x
],
theano
.
grad
(
fn
(
x
)
.
sum
(),
x
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPoolGrad
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
# Test the GPU grad + GPU implementation
def
fn
(
x
):
dnn_op
=
dnn
.
dnn_pool
(
x
,
ws
=
(
ws
,
ws
,
ws
),
stride
=
(
stride
,
stride
,
stride
),
pad
=
pad
,
mode
=
mode
)
return
dnn_op
utt
.
verify_grad
(
fn
,
[
data
],
cast_to_output_type
=
False
,
mode
=
mode_with_gpu
)
# Confirm that we get the good op.
fg
=
theano
.
function
([
x
],
theano
.
grad
(
fn
(
x
)
.
sum
(),
x
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPoolGrad
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
g_out
=
fg
(
data
)
# Compare against the CPU result
out
=
pool_3d
(
x
,
(
ws
,
ws
,
ws
),
padding
=
pad
,
ignore_border
=
True
,
mode
=
mode
)
fc
=
theano
.
function
([
x
],
theano
.
grad
(
out
.
sum
(),
x
),
mode
=
mode_without_gpu
)
if
mode
==
'max'
:
assert
any
([
isinstance
(
node
.
op
,
MaxPoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
else
:
assert
any
([
isinstance
(
node
.
op
,
AveragePoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
c_out
=
fc
(
data
)
utt
.
assert_allclose
(
c_out
,
g_out
)
def
test_pooling_with_tensor_vars
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
...
@@ -331,6 +466,7 @@ def test_pooling_opt():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
# 2D pooling
x
=
T
.
fmatrix
()
f
=
theano
.
function
(
...
...
@@ -344,6 +480,7 @@ def test_pooling_opt():
f
(
numpy
.
zeros
((
10
,
10
),
dtype
=
'float32'
))
# gradient of 2D pooling
f
=
theano
.
function
(
[
x
],
T
.
grad
(
pool_2d
(
x
,
ds
=
(
2
,
2
),
mode
=
'average_inc_pad'
,
...
...
@@ -362,11 +499,86 @@ def test_pooling_opt():
pool_2d
(
x
,
ds
=
(
2
,
3
),
mode
=
'sum'
,
ignore_border
=
True
),
mode
=
mode_with_gpu
)
data
=
numpy
.
random
.
rand
(
10
,
10
)
.
astype
(
'float32'
)
f
(
data
)
# 3D pooling
x
=
T
.
ftensor3
()
f
=
theano
.
function
(
[
x
],
pool_3d
(
x
,
ds
=
(
2
,
2
,
2
),
mode
=
'average_inc_pad'
,
ignore_border
=
True
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
n
.
op
,
dnn
.
GpuDnnPool
)
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
data
=
numpy
.
random
.
rand
(
10
,
10
)
.
astype
(
'float32'
)
f
(
data
)
f
(
numpy
.
zeros
((
10
,
10
,
10
),
dtype
=
'float32'
))
# gradient of 3D pooling
f
=
theano
.
function
(
[
x
],
T
.
grad
(
pool_3d
(
x
,
ds
=
(
2
,
2
,
2
),
mode
=
'average_inc_pad'
,
ignore_border
=
True
)
.
sum
(),
x
),
mode
=
mode_with_gpu
.
including
(
"cudnn"
))
assert
any
([
isinstance
(
n
.
op
,
dnn
.
GpuDnnPoolGrad
)
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
f
(
numpy
.
zeros
((
10
,
10
,
10
),
dtype
=
'float32'
))
def
test_pooling_opt_arbitrary_dimensions
():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
# 'average_exc_pad' is disabled for versions < 4004
if
dnn
.
version
(
raises
=
False
)
<
4004
:
modes
=
(
'max'
,
'average_inc_pad'
)
else
:
modes
=
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
for
n_non_pool_dims
in
(
0
,
1
,
2
,
3
):
for
ws
in
((
2
,
2
),
(
3
,
3
,
3
)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp
=
(
2
,)
*
n_non_pool_dims
+
(
5
,)
*
len
(
ws
)
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
'float32'
)
input
=
gpuarray_shared_constructor
(
data
)
for
mode
in
modes
:
out_pool
=
Pool
(
ndim
=
len
(
ws
),
mode
=
mode
,
ignore_border
=
True
)(
input
,
ws
)
out_pool_grad
=
T
.
grad
(
T
.
sum
(
out_pool
),
wrt
=
input
)
out
=
[
out_pool
,
out_pool_grad
]
# run on GPU
fg
=
theano
.
function
([],
out
,
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPool
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPoolGrad
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
res_gpu
=
fg
()
# run on CPU
fc
=
theano
.
function
([],
out
,
mode
=
mode_without_gpu
)
assert
any
([
isinstance
(
node
.
op
,
Pool
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
if
mode
==
'max'
:
assert
any
([
isinstance
(
node
.
op
,
MaxPoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
else
:
assert
any
([
isinstance
(
node
.
op
,
AveragePoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
res_cpu
=
fg
()
# check for similarity
utt
.
assert_allclose
(
res_gpu
[
0
],
res_cpu
[
0
])
utt
.
assert_allclose
(
res_gpu
[
1
],
res_cpu
[
1
])
def
test_dnn_tag
():
...
...
@@ -625,6 +837,33 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnPool
)
def
test_pool_3d
(
self
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
img
=
T
.
ftensor5
(
'img'
)
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
'float32'
)
# 'average_exc_pad' is disabled for versions < 4004
if
dnn
.
version
(
raises
=
False
)
<
4004
:
modes
=
[
'max'
,
'average_inc_pad'
]
else
:
modes
=
[
'max'
,
'average_inc_pad'
,
'average_exc_pad'
]
for
params
in
product
(
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
3
,
3
,
3
)],
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
3
,
3
,
3
)],
modes
):
self
.
_compile_and_check
(
[
img
],
[
dnn
.
GpuDnnPool
(
mode
=
params
[
2
])(
img
,
params
[
0
],
params
[
1
],
(
0
,
0
,
0
))],
[
img_val
],
dnn
.
GpuDnnPool
)
def
test_pool_grad
(
self
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
...
@@ -664,6 +903,45 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnPoolGrad
)
def
test_pool_3d_grad
(
self
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
img
=
T
.
ftensor5
(
'img'
)
img_grad
=
T
.
ftensor5
(
'img_grad'
)
out
=
T
.
ftensor5
(
'out'
)
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
'float32'
)
img_grad_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
'float32'
)
out_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
'float32'
)
for
params
in
product
(
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
3
,
3
,
3
)],
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
3
,
3
,
3
)],
[
'max'
,
'average_inc_pad'
]
):
pool_grad
=
dnn
.
GpuDnnPoolGrad
(
mode
=
params
[
2
])(
img
,
out
,
img_grad
,
params
[
0
],
params
[
1
],
(
0
,
0
,
0
)
)
self
.
_compile_and_check
(
[
img
,
img_grad
,
out
],
[
pool_grad
],
[
img_val
,
img_grad_val
,
out_val
],
dnn
.
GpuDnnPoolGrad
)
# this has been a problem in the past
def
test_dnn_conv_border_mode
():
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
a16e91f7
...
...
@@ -30,7 +30,8 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from
theano.sandbox.cuda.blas
import
(
GpuConv
,
GpuDownsampleFactorMax
,
GpuDownsampleFactorMaxGrad
)
from
theano.sandbox.cuda.nnet
import
GpuSoftmax
from
theano.sandbox.cuda.opt_util
import
alpha_merge
,
output_merge
from
theano.sandbox.cuda.opt_util
import
(
alpha_merge
,
output_merge
,
pad_dims
,
unpad_dims
)
from
theano.sandbox.cuda
import
gpu_seqopt
,
register_opt
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compiler
...
...
@@ -1391,20 +1392,23 @@ class GpuDnnPoolDesc(GpuOp):
def
do_constant_folding
(
self
,
node
):
return
False
def
__init__
(
self
,
ws
=
(
1
,
1
),
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)
):
def
__init__
(
self
,
ws
=
(
1
,
1
),
stride
=
None
,
mode
=
'max'
,
pad
=
None
):
if
mode
==
'average'
:
mode
=
'average_inc_pad'
assert
mode
in
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
self
.
mode
=
mode
if
stride
is
None
:
stride
=
(
1
,)
*
len
(
ws
)
if
pad
is
None
:
pad
=
(
0
,)
*
len
(
ws
)
assert
len
(
ws
)
==
len
(
stride
)
and
len
(
stride
)
==
len
(
pad
)
assert
len
(
ws
)
in
(
2
,
3
)
self
.
ws
=
ws
self
.
stride
=
stride
self
.
pad
=
pad
if
(
pad
[
0
]
!=
0
or
pad
[
1
]
!=
0
)
and
version
()
==
-
1
:
raise
RuntimeError
(
"cuDNN pooling with padding requires cuDNN v2"
)
if
self
.
get_ndim
()
==
3
and
version
()
<
(
3000
,
3000
):
raise
RuntimeError
(
"cuDNN 3d pooling requires cuDNN v3"
)
if
(
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
...
...
@@ -1418,12 +1422,9 @@ class GpuDnnPoolDesc(GpuOp):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
if
not
hasattr
(
self
,
'pad'
):
self
.
pad
=
(
0
,
0
)
self
.
pad
=
(
0
,
)
*
self
.
get_ndim
(
)
def
make_node
(
self
):
if
self
.
pad
!=
(
0
,
0
)
and
version
()
==
-
1
:
raise
RuntimeError
(
"cuDNN pooling with padding requires cuDNN v2"
)
node
=
Apply
(
self
,
[],
[
CDataType
(
"cudnnPoolingDescriptor_t"
,
freefunc
=
"cudnnDestroyPoolingDescriptor"
)()])
...
...
@@ -1444,8 +1445,6 @@ class GpuDnnPoolDesc(GpuOp):
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif
self
.
mode
==
"average_exc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if
version
()
==
-
1
:
raise
Exception
(
"cudnn v1 do not support average_exc_pad"
)
else
:
raise
NotImplementedError
(
"Unsupported pooling model."
)
...
...
@@ -1616,8 +1615,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif
self
.
mode
==
"average_exc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if
version
()
==
-
1
:
raise
Exception
(
"cudnn v1 do not support average_exc_pad"
)
else
:
raise
NotImplementedError
(
"Unsupported pooling model."
)
...
...
@@ -1872,8 +1869,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif
self
.
mode
==
"average_exc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if
version
()
==
-
1
:
raise
Exception
(
"cudnn v1 do not support average_exc_pad"
)
else
:
raise
NotImplementedError
(
"Unsupported pooling model."
)
...
...
@@ -1976,28 +1971,33 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return
[
shape
[
0
]]
def
dnn_pool
(
img
,
ws
,
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)
):
def
dnn_pool
(
img
,
ws
,
stride
=
None
,
mode
=
'max'
,
pad
=
None
):
"""
GPU pooling using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
'first dim', 'second dim' in that order.
For 2D pooling, the memory layout to use is 'bc01', that is 'batch',
'channel', 'first dim', 'second dim' in that order.
For 3D pooling, the memory layout to use is 'bc012', that is 'batch',
'channel', 'first dim', 'second dim', 'third dim'.
Parameters
----------
img
Images to do the pooling over.
ws
Subsampling window size.
Subsampling window size.
Should have 2 or 3 elements.
stride
Subsampling stride (default: (1, 1)).
mode : {'max', 'average_inc_pad', 'average_exc_pad, 'sum'}
pad
:
(pad_h, pad_w) padding information
.
Subsampling stride (default: (1, 1)
or (1, 1, 1)
).
mode : {'max', 'average_inc_pad', 'average_exc_pad
'
, 'sum'}
pad
Padding: (pad_h, pad_w) for 2D or (pad_h, pad_w, pad_d) for 3D
.
pad_h is the number of zero-valued pixels added to each of the top and
bottom borders.
pad_w is the number of zero-valued pixels added to each of the left
and right borders.
pad_d is the number of zero-valued pixels added to each of the front
and back borders (3D pooling only).
.. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
...
...
@@ -2009,6 +2009,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
img
=
gpu_contiguous
(
img
)
if
stride
is
None
:
stride
=
(
1
,)
*
len
(
ws
)
if
pad
is
None
:
pad
=
(
0
,)
*
len
(
ws
)
if
mode
==
"sum"
:
ret
=
GpuDnnPool
(
mode
=
"average_inc_pad"
)(
img
,
ws
,
stride
,
pad
)
window_elem
=
theano
.
tensor
.
prod
(
ws
)
.
astype
(
ret
.
dtype
)
...
...
@@ -2972,10 +2976,21 @@ if True:
if
not
node
.
op
.
ignore_border
:
return
img
,
ws
,
stride
,
pad
=
node
.
inputs
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
img
.
ndim
-
2
)
mode
=
node
.
op
.
mode
if
nd
not
in
(
2
,
3
):
return
if
(
img
.
owner
and
isinstance
(
img
.
owner
.
op
,
HostFromGpu
)):
ret
=
dnn_pool
(
gpu_contiguous
(
img
.
owner
.
inputs
[
0
]),
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
if
img
.
ndim
==
nd
+
2
:
ret
=
dnn_pool
(
gpu_contiguous
(
img
.
owner
.
inputs
[
0
]),
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
else
:
input
=
gpu_contiguous
(
img
.
owner
.
inputs
[
0
])
# reshape to 4D or 5D with 2 non-pooling dimensions
input_padded
=
pad_dims
(
input
,
2
,
nd
)
ret_padded
=
dnn_pool
(
input_padded
,
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
ret
=
unpad_dims
(
ret_padded
,
input
,
2
,
nd
)
return
[
host_from_gpu
(
ret
)]
@register_opt
(
'cudnn'
)
...
...
@@ -3003,17 +3018,30 @@ if True:
if
not
node
.
op
.
ignore_border
:
return
inp
,
out
,
inp_grad
,
ws
,
stride
,
pad
=
node
.
inputs
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
inp
.
ndim
-
2
)
mode
=
node
.
op
.
mode
if
nd
not
in
(
2
,
3
):
return
if
((
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
HostFromGpu
))
or
(
out
.
owner
and
isinstance
(
out
.
owner
.
op
,
HostFromGpu
))
or
(
inp_grad
.
owner
and
isinstance
(
inp_grad
.
owner
.
op
,
HostFromGpu
))):
ret
=
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
gpu_contiguous
(
out
),
gpu_contiguous
(
inp_grad
),
ws
,
stride
,
pad
)
if
inp
.
ndim
==
nd
+
2
:
ret
=
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
gpu_contiguous
(
out
),
gpu_contiguous
(
inp_grad
),
ws
,
stride
,
pad
)
else
:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded
=
pad_dims
(
gpu_contiguous
(
inp
),
2
,
nd
)
out_padded
=
pad_dims
(
gpu_contiguous
(
out
),
2
,
nd
)
inp_grad_padded
=
pad_dims
(
gpu_contiguous
(
inp_grad
),
2
,
nd
)
ret_padded
=
GpuDnnPoolGrad
(
mode
=
mode
)(
inp_padded
,
out_padded
,
inp_grad_padded
,
ws
,
stride
,
pad
)
ret
=
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
return
[
host_from_gpu
(
ret
)]
@register_opt
(
'cudnn'
)
...
...
@@ -3025,16 +3053,28 @@ if True:
if
not
node
.
op
.
ignore_border
:
return
inp
,
inp_grad
,
ws
,
stride
,
pad
=
node
.
inputs
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
inp
.
ndim
-
2
)
mode
=
node
.
op
.
mode
if
nd
not
in
(
2
,
3
):
return
if
((
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
HostFromGpu
))
or
(
inp_grad
.
owner
and
isinstance
(
inp_grad
.
owner
.
op
,
HostFromGpu
))):
contiguous_inp_grad
=
gpu_contiguous
(
inp_grad
)
ret
=
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
contiguous_inp_grad
,
contiguous_inp_grad
,
ws
,
stride
,
pad
)
if
inp
.
ndim
==
nd
+
2
:
contiguous_inp_grad
=
gpu_contiguous
(
inp_grad
)
ret
=
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
contiguous_inp_grad
,
contiguous_inp_grad
,
ws
,
stride
,
pad
)
else
:
inp_padded
=
pad_dims
(
gpu_contiguous
(
inp
),
2
,
nd
)
inp_grad_padded
=
pad_dims
(
gpu_contiguous
(
inp_grad
),
2
,
nd
)
ret_padded
=
GpuDnnPoolGrad
(
mode
=
mode
)(
inp_padded
,
inp_grad_padded
,
inp_grad_padded
,
ws
,
stride
,
pad
)
ret
=
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
return
[
host_from_gpu
(
ret
)]
@register_opt
(
'cudnn'
)
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
a16e91f7
...
...
@@ -40,6 +40,7 @@ from theano.sandbox.cuda.basic_ops import (
GpuSubtensor
,
GpuAdvancedSubtensor1
,
GpuAdvancedIncSubtensor1
,
GpuAdvancedIncSubtensor1_dev20
,
GpuIncSubtensor
,
gpu_alloc
,
GpuAlloc
,
gpu_shape
,
GpuSplit
,
GpuAllocEmpty
)
from
theano.sandbox.cuda.opt_util
import
pad_dims
,
unpad_dims
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.blas
import
(
...
...
@@ -1891,15 +1892,12 @@ def local_convtransp3d_gemm(node):
gpu_optimizer
.
register
(
"convtransp3d_gemm"
,
local_convtransp3d_gemm
)
def
_check_constant_args_pool
(
ws
,
stride
,
pad
,
node
):
def
_check_constant_args_pool
(
ndim
,
ws
,
stride
,
pad
,
node
):
"""Check if the args of pool are constants. Warns if not."""
try
:
ws_w
=
tensor
.
get_scalar_constant_value
(
ws
[
0
])
ws_h
=
tensor
.
get_scalar_constant_value
(
ws
[
1
])
stride_w
=
tensor
.
get_scalar_constant_value
(
stride
[
0
])
stride_h
=
tensor
.
get_scalar_constant_value
(
stride
[
1
])
pad_w
=
tensor
.
get_scalar_constant_value
(
pad
[
0
])
pad_h
=
tensor
.
get_scalar_constant_value
(
pad
[
1
])
ws
=
tuple
(
tensor
.
get_scalar_constant_value
(
ws
[
i
])
for
i
in
range
(
ndim
))
stride
=
tuple
(
tensor
.
get_scalar_constant_value
(
stride
[
i
])
for
i
in
range
(
ndim
))
pad
=
tuple
(
tensor
.
get_scalar_constant_value
(
pad
[
i
])
for
i
in
range
(
ndim
))
except
tensor
.
NotScalarConstantError
:
msg
=
(
"Pool with tensor variable for the window size, stride or "
"padding is only supported in the new GPU backend, so this op "
...
...
@@ -1909,65 +1907,96 @@ def _check_constant_args_pool(ws, stride, pad, node):
elif
config
.
assert_no_cpu_op
==
"raise"
:
raise
AssertionError
(
msg
)
return
None
ws
=
(
ws_w
,
ws_h
)
stride
=
(
stride_w
,
stride_h
)
pad
=
(
pad_w
,
pad_h
)
return
ws
,
stride
,
pad
@register_opt
()
@local_optimizer
([
pool
.
Pool
])
def
local_gpu_downsample_factor_max
(
node
):
if
isinstance
(
node
.
op
,
pool
.
Pool
):
assert
node
.
op
.
__props__
==
(
'ignore_border'
,
'mode'
)
if
(
isinstance
(
node
.
op
,
pool
.
Pool
)
):
assert
node
.
op
.
__props__
==
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
x
,
ws
,
stride
,
pad
=
node
.
inputs
ret
=
_check_constant_args_pool
(
ws
,
stride
,
pad
,
node
)
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
x
.
ndim
-
2
)
ret
=
_check_constant_args_pool
(
nd
,
ws
,
stride
,
pad
,
node
)
if
ret
is
None
:
return
ws
,
stride
,
pad
=
ret
if
(
pad
)
!=
(
0
,
0
)
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
:
if
(
nd
!=
2
or
max
(
node
.
op
.
padding
)
!=
0
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
):
return
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
gpu_ds
=
GpuDownsampleFactorMax
(
ws
,
node
.
op
.
ignore_border
)
return
[
host_from_gpu
(
gpu_ds
(
x
.
owner
.
inputs
[
0
]))]
gpu_ws
=
GpuDownsampleFactorMax
(
ws
,
node
.
op
.
ignore_border
)
if
node
.
inputs
[
0
]
.
ndim
==
4
:
return
[
host_from_gpu
(
gpu_ws
(
x
.
owner
.
inputs
[
0
]))]
else
:
input_4D
=
pad_dims
(
x
.
owner
.
inputs
[
0
],
2
,
2
)
output_4D
=
gpu_ws
(
input_4D
)
output
=
unpad_dims
(
output_4D
,
x
.
owner
.
inputs
[
0
],
2
,
2
)
return
[
host_from_gpu
(
output
)]
@register_opt
()
@local_optimizer
([
pool
.
MaxPoolGrad
])
def
local_gpu_downsample_factor_max_grad
(
node
):
if
isinstance
(
node
.
op
,
pool
.
MaxPoolGrad
):
assert
node
.
op
.
__props__
==
(
'ignore_border'
,
'mode'
)
if
(
isinstance
(
node
.
op
,
pool
.
MaxPoolGrad
)
):
assert
node
.
op
.
__props__
==
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
x
,
z
,
gz
,
ws
,
stride
,
pad
=
node
.
inputs
ret
=
_check_constant_args_pool
(
ws
,
stride
,
pad
,
node
)
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
x
.
ndim
-
2
)
ret
=
_check_constant_args_pool
(
nd
,
ws
,
stride
,
pad
,
node
)
if
ret
is
None
:
return
ws
,
stride
,
pad
=
ret
if
pad
!=
(
0
,
0
)
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
:
if
(
nd
!=
2
or
max
(
node
.
op
.
padding
)
!=
0
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
):
return
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
gpu_ds_grad
=
GpuDownsampleFactorMaxGrad
(
ws
,
node
.
op
.
ignore_border
)
return
[
host_from_gpu
(
gpu_ds_grad
(
x
.
owner
.
inputs
[
0
],
as_cuda_ndarray_variable
(
z
),
as_cuda_ndarray_variable
(
gz
)))]
gpu_ws_grad
=
GpuDownsampleFactorMaxGrad
(
ws
,
node
.
op
.
ignore_border
)
if
node
.
inputs
[
0
]
.
ndim
==
4
:
return
[
host_from_gpu
(
gpu_ws_grad
(
x
.
owner
.
inputs
[
0
],
as_cuda_ndarray_variable
(
z
),
as_cuda_ndarray_variable
(
gz
)))]
else
:
x_4D
=
pad_dims
(
x
.
owner
.
inputs
[
0
],
2
,
2
)
z_4D
=
pad_dims
(
as_cuda_ndarray_variable
(
z
),
2
,
2
)
gz_4D
=
pad_dims
(
as_cuda_ndarray_variable
(
gz
),
2
,
2
)
output_4D
=
gpu_ws_grad
(
x_4D
,
z_4D
,
gz_4D
)
output
=
unpad_dims
(
output_4D
,
x
.
owner
.
inputs
[
0
],
2
,
2
)
return
[
host_from_gpu
(
output
)]
@register_opt
()
@local_optimizer
([
pool
.
DownsampleFactorMaxGradGrad
])
def
local_gpu_downsample_factor_max_grad_grad
(
node
):
if
isinstance
(
node
.
op
,
pool
.
DownsampleFactorMaxGradGrad
):
assert
node
.
op
.
__props__
==
(
'ignore_border'
,
'mode'
)
assert
node
.
op
.
__props__
==
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
x
,
z
,
gx
,
ws
,
stride
,
pad
=
node
.
inputs
ret
=
_check_constant_args_pool
(
ws
,
stride
,
pad
,
node
)
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
x
.
ndim
-
2
)
ret
=
_check_constant_args_pool
(
nd
,
ws
,
stride
,
pad
,
node
)
if
ret
is
None
:
return
ws
,
stride
,
pad
=
ret
if
pad
!=
(
0
,
0
)
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
:
if
(
nd
!=
2
or
max
(
node
.
op
.
padding
)
!=
0
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
):
return
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
op
=
GpuDownsampleFactorMaxGradGrad
(
ws
,
node
.
op
.
ignore_border
)
return
[
host_from_gpu
(
op
(
x
.
owner
.
inputs
[
0
],
as_cuda_ndarray_variable
(
z
),
as_cuda_ndarray_variable
(
gx
)))]
if
node
.
inputs
[
0
]
.
ndim
==
4
:
return
[
host_from_gpu
(
op
(
x
.
owner
.
inputs
[
0
],
as_cuda_ndarray_variable
(
z
),
as_cuda_ndarray_variable
(
gx
)))]
else
:
x_4D
=
pad_dims
(
x
.
owner
.
inputs
[
0
],
2
,
2
)
z_4D
=
pad_dims
(
as_cuda_ndarray_variable
(
z
),
2
,
2
)
gx_4D
=
pad_dims
(
as_cuda_ndarray_variable
(
gx
),
2
,
2
)
output_4D
=
op
(
x_4D
,
z_4D
,
gx_4D
)
output
=
unpad_dims
(
output_4D
,
x
.
owner
.
inputs
[
0
],
2
,
2
)
return
[
host_from_gpu
(
output
)]
@register_opt
()
...
...
theano/sandbox/cuda/opt_util.py
浏览文件 @
a16e91f7
...
...
@@ -3,13 +3,13 @@ from functools import wraps
import
numpy
from
theano
import
scalar
as
scal
,
Constant
from
theano
import
tensor
,
scalar
as
scal
,
Constant
from
theano.gof
import
local_optimizer
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
NotScalarConstantError
)
from
theano.sandbox.cuda.basic_ops
import
(
GpuFromHost
,
HostFromGpu
,
host_from_gpu
,
GpuDimShuffle
,
GpuElemwise
)
GpuFromHost
,
HostFromGpu
,
host_from_gpu
,
GpuDimShuffle
,
GpuElemwise
,
GpuReshape
)
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
...
...
@@ -126,3 +126,48 @@ def output_merge(cls, alpha_in, beta_in, out_in):
return
maker
(
targ
,
*
inputs
)
return
opt
return
wrapper
def
pad_dims
(
input
,
leftdims
,
rightdims
):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert
input
.
ndim
>=
rightdims
if
input
.
ndim
==
(
leftdims
+
rightdims
):
return
input
# extract image dimensions
img_shape
=
input
.
shape
[
-
rightdims
:]
# count the number of "leading" dimensions, store as dmatrix
batch_size
=
tensor
.
prod
(
input
.
shape
[:
-
rightdims
])
batch_size
=
tensor
.
shape_padright
(
batch_size
,
1
)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape
=
tensor
.
cast
(
tensor
.
join
(
0
,
batch_size
,
tensor
.
as_tensor
([
1
]
*
(
leftdims
-
1
)),
img_shape
),
'int64'
)
input_ND
=
GpuReshape
(
leftdims
+
rightdims
)(
input
,
new_shape
)
return
input_ND
def
unpad_dims
(
output
,
input
,
leftdims
,
rightdims
):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if
output
.
ndim
==
input
.
ndim
:
return
output
# restore the output to the original shape
outshp
=
tensor
.
join
(
0
,
input
.
shape
[:
-
rightdims
],
output
.
shape
[
-
rightdims
:])
return
GpuReshape
(
input
.
ndim
)(
output
,
outshp
)
theano/sandbox/cuda/tests/test_blas.py
浏览文件 @
a16e91f7
...
...
@@ -326,7 +326,9 @@ if 0:
def
test_downsample
():
shps
=
[(
1
,
1
,
1
,
12
),
shps
=
[(
1
,
12
),
(
1
,
1
,
12
),
(
1
,
1
,
1
,
12
),
(
1
,
1
,
2
,
2
),
(
1
,
1
,
1
,
1
),
(
1
,
1
,
4
,
4
),
...
...
@@ -359,17 +361,17 @@ def test_downsample():
for
shp
in
shps
:
for
ds
in
(
2
,
2
),
(
3
,
2
),
(
1
,
1
):
if
ds
[
0
]
>
shp
[
2
]:
if
ds
[
0
]
>
shp
[
-
2
]:
continue
if
ds
[
1
]
>
shp
[
3
]:
if
ds
[
1
]
>
shp
[
-
1
]:
continue
# GpuDownsampleFactorMax doesn't like having more than 512 columns
# in the output tensor.
if
float
(
shp
[
3
])
/
ds
[
1
]
>
512
:
if
float
(
shp
[
-
1
])
/
ds
[
1
]
>
512
:
continue
for
ignore_border
in
(
True
,
False
):
# print 'test_downsample', shp, ds, ignore_border
ds_op
=
Pool
(
ignore_border
=
ignore_border
)
ds_op
=
Pool
(
ndim
=
len
(
ds
),
ignore_border
=
ignore_border
)
a
=
tcn
.
shared_constructor
(
my_rand
(
*
shp
),
'a'
)
f
=
pfunc
([],
ds_op
(
tensor
.
as_tensor_variable
(
a
),
ds
),
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
a16e91f7
...
...
@@ -15,8 +15,8 @@ import theano
import
theano.tensor
as
T
import
theano.tests.unittest_tools
as
utt
from
theano.sandbox.neighbours
import
images2neibs
from
theano.tensor.signal.pool
import
pool_2d
from
theano.tensor.signal.pool
import
MaxPoolGrad
,
AveragePoolGrad
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
import
theano.sandbox.cuda.dnn
as
dnn
from
theano.sandbox.cuda.basic_ops
import
GpuAllocEmpty
,
gpu_alloc_empty
from
theano.sandbox.cuda
import
float32_shared_constructor
as
shared
...
...
@@ -170,7 +170,7 @@ def test_dnn_conv_inplace():
def
pool3d2d
(
input
,
ds
=
(
2
,
2
,
2
),
strides
=
None
,
pad
=
(
0
,
0
,
0
),
pool_func
=
T
.
max
,
mode
=
'ignore_borders'
):
pool_func
tion
=
T
.
max
,
mode
=
'ignore_borders'
):
if
strides
is
None
:
strides
=
ds
...
...
@@ -179,13 +179,13 @@ def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
# resahpe to B, C*0, 1, 2 and do the pooling on 1, 2
first
=
input
.
reshape
((
shape
[
0
],
shape
[
1
]
*
shape
[
2
],
shape
[
3
],
shape
[
4
]))
pooled1
=
pool_2d_i2n
(
first
,
ds
=
ds
[
1
:],
strides
=
strides
[
1
:],
pad
=
pad
[
1
:],
pool_function
=
pool_func
,
mode
=
mode
)
pool_function
=
pool_func
tion
,
mode
=
mode
)
shp1
=
pooled1
.
shape
# reshape to B, C, 0, 1*2 and do the pooling on 0
second
=
pooled1
.
reshape
((
shape
[
0
],
shape
[
1
],
shape
[
2
],
shp1
[
2
]
*
shp1
[
3
]))
pooled2
=
pool_2d_i2n
(
second
,
ds
=
(
ds
[
0
],
1
),
strides
=
(
strides
[
0
],
1
),
pad
=
(
pad
[
0
],
0
),
pool_function
=
pool_func
,
mode
=
mode
)
pad
=
(
pad
[
0
],
0
),
pool_function
=
pool_func
tion
,
mode
=
mode
)
shp2
=
pooled2
.
shape
return
pooled2
.
reshape
((
shape
[
0
],
shape
[
1
],
shp2
[
2
],
shp1
[
2
],
shp1
[
3
]))
...
...
@@ -241,8 +241,6 @@ def test_pooling():
func
=
T
.
max
else
:
func
=
T
.
mean
if
pad
!=
(
0
,
0
)
and
cuda
.
dnn
.
version
()
==
-
1
:
continue
if
pad
!=
(
0
,
0
)
and
func
is
T
.
mean
:
continue
...
...
@@ -418,6 +416,7 @@ def test_pooling3d():
if
not
cuda
.
dnn
.
dnn_available
()
or
cuda
.
dnn
.
version
()
<
(
3000
,
3000
):
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
# We force the FAST_RUN as we don't want the reference to run in DebugMode.
mode_without_gpu_ref
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
...
...
@@ -427,8 +426,7 @@ def test_pooling3d():
else
:
modes
=
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
x
=
T
.
TensorType
(
broadcastable
=
(
False
,
False
,
False
,
False
,
False
),
dtype
=
'float32'
)()
x
=
T
.
ftensor5
()
for
mode
,
pad
in
product
(
modes
,
((
0
,
0
,
0
),
(
1
,
0
,
0
),
(
0
,
1
,
0
),
(
0
,
0
,
1
),
(
2
,
3
,
2
),
(
3
,
2
,
2
),
(
2
,
2
,
3
))):
...
...
@@ -436,8 +434,6 @@ def test_pooling3d():
func
=
T
.
max
else
:
func
=
T
.
mean
if
pad
!=
(
0
,
0
,
0
)
and
cuda
.
dnn
.
version
()
==
-
1
:
continue
if
pad
!=
(
0
,
0
,
0
)
and
func
is
T
.
mean
:
continue
...
...
@@ -449,13 +445,13 @@ def test_pooling3d():
if
pad
[
0
]
>
stride
or
pad
[
1
]
>
stride
or
pad
[
2
]
>
stride
:
# Not implemented
continue
out1
=
cuda
.
dnn
.
dnn_pool
(
x
,
(
ws
,
ws
,
ws
),
stride
=
(
stride
,
stride
,
stride
),
pad
=
pad
,
mode
=
mode
)
out2
=
pool3d2d
(
x
,
ds
=
(
ws
,
ws
,
ws
),
strides
=
(
stride
,
stride
,
stride
),
pad
=
pad
,
pool_func
=
func
)
out1
=
pool_3d
(
x
,
(
ws
,
ws
,
ws
),
st
=
(
stride
,
stride
,
stride
),
ignore_border
=
True
,
padding
=
pad
,
mode
=
mode
)
out2
=
pool3d2d
(
x
,
ds
=
(
ws
,
ws
,
ws
),
strides
=
(
stride
,
stride
,
stride
),
pad
=
pad
,
pool_function
=
func
)
f1
=
theano
.
function
([
x
],
out1
,
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
node
in
f1
.
maker
.
fgraph
.
apply_nodes
])
...
...
@@ -510,11 +506,17 @@ def test_pooling3d():
g_out
=
fg
(
data
)
# Compare again the CPU result
out
=
pool
3d2
d
(
x
,
(
ws
,
ws
,
ws
),
strides
=
(
stride
,
stride
,
stride
)
,
pad
=
pad
,
pool_func
=
func
)
out
=
pool
_3
d
(
x
,
(
ws
,
ws
,
ws
),
padding
=
pad
,
ignore_border
=
True
,
mode
=
mode
)
fc
=
theano
.
function
([
x
],
theano
.
grad
(
out
.
sum
(),
x
),
mode
=
mode_without_gpu_ref
)
if
mode
==
'max'
:
assert
any
([
isinstance
(
node
.
op
,
MaxPoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
else
:
assert
any
([
isinstance
(
node
.
op
,
AveragePoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
c_out
=
fc
(
data
)
utt
.
assert_allclose
(
c_out
,
g_out
)
...
...
@@ -523,6 +525,7 @@ def test_pooling_opt():
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
# 2D pooling
x
=
T
.
fmatrix
()
f
=
theano
.
function
(
...
...
@@ -535,6 +538,7 @@ def test_pooling_opt():
f
(
numpy
.
zeros
((
10
,
10
),
dtype
=
'float32'
))
# gradient of 2D pooling
f
=
theano
.
function
(
[
x
],
T
.
grad
(
pool_2d
(
x
,
ds
=
(
2
,
2
),
mode
=
'average_inc_pad'
,
...
...
@@ -545,6 +549,7 @@ def test_pooling_opt():
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
f
(
numpy
.
zeros
((
10
,
10
),
dtype
=
'float32'
))
# Test sum pooling
f
=
theano
.
function
(
[
x
],
...
...
@@ -557,6 +562,82 @@ def test_pooling_opt():
data
=
numpy
.
random
.
rand
(
10
,
10
)
.
astype
(
'float32'
)
f
(
data
)
# 3D pooling
x
=
T
.
ftensor3
()
f
=
theano
.
function
(
[
x
],
pool_3d
(
x
,
ds
=
(
2
,
2
,
2
),
mode
=
'average_inc_pad'
,
ignore_border
=
True
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
n
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
f
(
numpy
.
zeros
((
10
,
10
,
10
),
dtype
=
'float32'
))
# gradient of 3D pooling
f
=
theano
.
function
(
[
x
],
T
.
grad
(
pool_3d
(
x
,
ds
=
(
2
,
2
,
2
),
mode
=
'average_inc_pad'
,
ignore_border
=
True
)
.
sum
(),
x
),
mode
=
mode_with_gpu
.
including
(
"cudnn"
))
assert
any
([
isinstance
(
n
.
op
,
cuda
.
dnn
.
GpuDnnPoolGrad
)
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
f
(
numpy
.
zeros
((
10
,
10
,
10
),
dtype
=
'float32'
))
def
test_pooling_opt_arbitrary_dimensions
():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
# 'average_exc_pad' is disabled for versions < 4004
if
cuda
.
dnn
.
version
()
<
(
4004
,
4004
):
modes
=
(
'max'
,
'average_inc_pad'
)
else
:
modes
=
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
for
n_non_pool_dims
in
(
0
,
1
,
2
,
3
):
for
ws
in
((
2
,
2
),
(
3
,
3
,
3
)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp
=
(
2
,)
*
n_non_pool_dims
+
(
5
,)
*
len
(
ws
)
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
'float32'
)
input
=
shared
(
data
)
for
mode
in
modes
:
out_pool
=
Pool
(
ndim
=
len
(
ws
),
mode
=
mode
,
ignore_border
=
True
)(
input
,
ws
)
out_pool_grad
=
T
.
grad
(
T
.
sum
(
out_pool
),
wrt
=
input
)
out
=
[
out_pool
,
out_pool_grad
]
# run on GPU
fg
=
theano
.
function
([],
out
,
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPoolGrad
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
res_gpu
=
fg
()
# run on CPU
fc
=
theano
.
function
([],
out
,
mode
=
mode_without_gpu
)
assert
any
([
isinstance
(
node
.
op
,
Pool
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
if
mode
==
'max'
:
assert
any
([
isinstance
(
node
.
op
,
MaxPoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
else
:
assert
any
([
isinstance
(
node
.
op
,
AveragePoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
res_cpu
=
fg
()
# check for similarity
utt
.
assert_allclose
(
res_gpu
[
0
],
res_cpu
[
0
])
utt
.
assert_allclose
(
res_gpu
[
1
],
res_cpu
[
1
])
class
test_DnnSoftMax
(
test_nnet
.
test_SoftMax
):
gpu_op
=
dnn
.
GpuDnnSoftmax
...
...
theano/tensor/signal/pool.py
浏览文件 @
a16e91f7
...
...
@@ -7,6 +7,7 @@ Pool, DownsampleAvg, DownsampleSoftmax.
from
__future__
import
absolute_import
,
print_function
,
division
# This file should move along with conv.py
import
warnings
import
itertools
import
numpy
from
six.moves
import
xrange
...
...
@@ -84,67 +85,101 @@ def pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
" Otherwise, the convolution will be executed on CPU."
,
stacklevel
=
2
)
ignore_border
=
False
if
input
.
ndim
==
4
:
op
=
Pool
(
ignore_border
,
mode
=
mode
)
output
=
op
(
input
,
ds
,
st
,
padding
)
return
output
op
=
Pool
(
ignore_border
,
ndim
=
2
,
mode
=
mode
)
output
=
op
(
input
,
ds
,
st
,
padding
)
return
output
# extract image dimensions
img_shape
=
input
.
shape
[
-
2
:]
# count the number of "leading" dimensions, store as dmatrix
batch_size
=
tensor
.
prod
(
input
.
shape
[:
-
2
])
batch_size
=
tensor
.
shape_padright
(
batch_size
,
1
)
def
pool_3d
(
input
,
ds
,
ignore_border
=
None
,
st
=
None
,
padding
=
(
0
,
0
,
0
),
mode
=
'max'
):
"""Downscale the input by a specified factor
# store as 4D tensor with shape: (batch_size,1,height,width)
new_shape
=
tensor
.
cast
(
tensor
.
join
(
0
,
batch_size
,
tensor
.
as_tensor
([
1
]),
img_shape
),
'int64'
)
input_4D
=
tensor
.
reshape
(
input
,
new_shape
,
ndim
=
4
)
Takes as input a N-D tensor, where N >= 3. It downscales the input image by
the specified factor, by keeping only the maximum value of non-overlapping
patches of size (ds[0],ds[1],ds[2])
# downsample mini-batch of images
op
=
Pool
(
ignore_border
,
mode
=
mode
)
output
=
op
(
input_4D
,
ds
,
st
,
padding
)
Parameters
----------
input : N-D theano tensor of input images
Input images. Max pooling will be done over the 3 last dimensions.
ds : tuple of length 3 or theano vector of ints of size 3
Factor by which to downscale (vertical ds, horizontal ds, depth ds).
(2,2,2) will halve the image in each dimension.
ignore_border : bool (default None, will print a warning and set to False)
When True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output.
(3,3,3) otherwise.
st : tuple of three ints or theano vector of ints of size 3
Stride size, which is the number of shifts over rows/cols/slices to get
the next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding : tuple of two ints or theano vector of ints of size 3
(pad_h, pad_w, pad_d), pad zeros to extend beyond six borders of the
images, pad_h is the size of the top and bottom margins,
pad_w is the size of the left and right margins, and pad_d is the size
of the front and back margins
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
Operation executed on each window. `max` and `sum` always exclude
the padding in the computation. `average` gives you the choice to
include or exclude it.
# restore to original shape
outshp
=
tensor
.
join
(
0
,
input
.
shape
[:
-
2
],
output
.
shape
[
-
2
:])
return
tensor
.
reshape
(
output
,
outshp
,
ndim
=
input
.
ndim
)
"""
if
input
.
ndim
<
3
:
raise
NotImplementedError
(
'pool_3d requires a dimension >= 3'
)
if
ignore_border
is
None
:
warnings
.
warn
(
"pool_3d() will have the parameter ignore_border"
" default value changed to True (currently"
" False). To have consistent behavior with all Theano"
" version, explicitly add the parameter ignore_border=True."
" On the GPU, using ignore_border=True is needed to use cuDNN."
" When using ignore_border=False and not using cuDNN, the only"
" GPU combination supported is when"
" `ds == st and padding == (0, 0, 0) and mode == 'max'`."
" Otherwise, the convolution will be executed on CPU."
,
stacklevel
=
2
)
ignore_border
=
False
op
=
Pool
(
ignore_border
,
ndim
=
3
,
mode
=
mode
)
output
=
op
(
input
,
ds
,
st
,
padding
)
return
output
class
Pool
(
OpenMPOp
):
"""
For N-dimensional tensors, consider that the last two dimensions span
images. This Op downsamples these images by taking the max, sum or average
over different patch.
The constructor takes the max, sum or average or different input patches.
This Op downsamples the last N dimensions of the input by taking the max,
sum or average over different patches.
Parameters
----------
ds : list or tuple of
two
ints
Downsample factor over rows
and column
.
ds indicates the
pool region size
.
ds : list or tuple of
N
ints
Downsample factor over rows
, columns etc
.
ds indicates the
size of the pooling region
.
ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col
If ds doesn't divide imgshape, do we include an extra row/col
/slice
of partial downsampling (False) or ignore it (True).
st : list or tuple of
two
ints or None
Stride size, which is the number of shifts over rows/cols to get the
st : list or tuple of
N
ints or None
Stride size, which is the number of shifts over rows/cols
/slices
to get the
next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding: tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders of the images,
pad_h is the size of the top and bottom margins, and pad_w is the size
of the left and right margins.
padding : tuple of N ints or None
For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
('average_inc_pad' excludes the padding from the count,
'average_exc_pad' include it)
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
"""
__props__
=
(
'ignore_border'
,
'mode'
)
__props__
=
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
@staticmethod
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
)
):
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
None
,
ndim
=
None
):
"""
Return the shape of the output from this op, for input of given
shape and flags.
...
...
@@ -152,88 +187,79 @@ class Pool(OpenMPOp):
Parameters
----------
imgshape : tuple, list, or similar of integer or scalar Theano variable
The shape of a tensor of images. The last
two
elements are
The shape of a tensor of images. The last
N
elements are
interpreted as the number of rows, and the number of cols.
ds : list or tuple of two ints
Downsample factor over rows and columns this parameter indicates
the size of the pooling region.
st : list or tuple of two ints
The stride size. This is the distance between the pooling regions.
If it's set to None, it equals ds.
ds : list or tuple of N ints
Downsample factor over rows and column.
ds indicates the pool region size.
ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col of
partial downsampling (False) or ignore it (True).
padding : tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders
of the images, pad_h is the size of the top and bottom margins,
and pad_w is the size of the left and right margins.
If ds doesn't divide imgshape, do we include an extra row/col/slice
of partial downsampling (False) or ignore it (True).
st : list or tuple of N ints or None
Stride size, which is the number of shifts over rows/cols/slices to get the
next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding : tuple of N ints or None
For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
Returns
-------
list
The shape of the output from this op, for input of given shape.
This will have the same length as imgshape, but with last
two
This will have the same length as imgshape, but with last
N
elements reduced as per the downsampling & ignore_border flags.
"""
if
len
(
imgshape
)
<
2
:
raise
TypeError
(
'imgshape must have at least two elements '
'(rows, cols)'
)
if
ndim
is
None
:
ndim
=
len
(
imgshape
)
-
2
if
len
(
imgshape
)
<
ndim
:
raise
TypeError
(
'imgshape must have at least {} dimensions'
.
format
(
ndim
))
if
st
is
None
:
st
=
ds
r
,
c
=
imgshape
[
-
2
:]
r
=
tensor
.
extract_constant
(
r
)
c
=
tensor
.
extract_constant
(
c
)
if
padding
[
0
]:
r
=
r
+
padding
[
0
]
*
2
if
padding
[
1
]:
c
=
c
+
padding
[
1
]
*
2
if
ignore_border
:
if
ds
[
0
]
==
st
[
0
]:
nr
=
r
//
st
[
0
]
else
:
out_r
=
(
r
-
ds
[
0
])
//
st
[
0
]
+
1
if
isinstance
(
r
,
theano
.
Variable
):
nr
=
tensor
.
maximum
(
out_r
,
0
)
if
padding
is
None
:
padding
=
(
0
,)
*
ndim
patch_shape
=
tuple
(
tensor
.
extract_constant
(
imgshape
[
-
ndim
+
i
])
+
padding
[
i
]
*
2
for
i
in
xrange
(
ndim
))
def
compute_out
(
v
,
downsample
,
stride
):
if
ignore_border
:
if
downsample
==
stride
:
return
v
//
stride
else
:
nr
=
numpy
.
maximum
(
out_r
,
0
)
if
ds
[
1
]
==
st
[
1
]:
nc
=
c
//
st
[
1
]
out
=
(
v
-
downsample
)
//
stride
+
1
if
isinstance
(
out
,
theano
.
Variable
):
return
tensor
.
maximum
(
out
,
0
)
else
:
return
numpy
.
maximum
(
out
,
0
)
else
:
out_c
=
(
c
-
ds
[
1
])
//
st
[
1
]
+
1
if
isinstance
(
c
,
theano
.
Variable
):
nc
=
tensor
.
maximum
(
out_c
,
0
)
if
isinstance
(
v
,
theano
.
Variable
):
return
tensor
.
switch
(
tensor
.
ge
(
stride
,
downsample
),
(
v
-
1
)
//
stride
+
1
,
tensor
.
maximum
(
0
,
(
v
-
1
-
downsample
)
//
stride
+
1
)
+
1
)
elif
stride
>=
downsample
:
return
(
v
-
1
)
//
stride
+
1
else
:
nc
=
numpy
.
maximum
(
out_c
,
0
)
else
:
if
isinstance
(
r
,
theano
.
Variable
):
nr
=
tensor
.
switch
(
tensor
.
ge
(
st
[
0
],
ds
[
0
]),
(
r
-
1
)
//
st
[
0
]
+
1
,
tensor
.
maximum
(
0
,
(
r
-
1
-
ds
[
0
])
//
st
[
0
]
+
1
)
+
1
)
elif
st
[
0
]
>=
ds
[
0
]:
nr
=
(
r
-
1
)
//
st
[
0
]
+
1
else
:
nr
=
max
(
0
,
(
r
-
1
-
ds
[
0
]
+
st
[
0
])
//
st
[
0
])
+
1
if
isinstance
(
c
,
theano
.
Variable
):
nc
=
tensor
.
switch
(
tensor
.
ge
(
st
[
1
],
ds
[
1
]),
(
c
-
1
)
//
st
[
1
]
+
1
,
tensor
.
maximum
(
0
,
(
c
-
1
-
ds
[
1
])
//
st
[
1
]
+
1
)
+
1
)
elif
st
[
1
]
>=
ds
[
1
]:
nc
=
(
c
-
1
)
//
st
[
1
]
+
1
else
:
nc
=
max
(
0
,
(
c
-
1
-
ds
[
1
]
+
st
[
1
])
//
st
[
1
])
+
1
return
max
(
0
,
(
v
-
1
-
downsample
+
stride
)
//
stride
)
+
1
out_shape
=
[
compute_out
(
patch_shape
[
i
],
ds
[
i
],
st
[
i
])
for
i
in
xrange
(
ndim
)]
rval
=
list
(
imgshape
[:
-
2
])
+
[
nr
,
nc
]
rval
=
list
(
imgshape
[:
-
ndim
])
+
out_shape
return
rval
def
__init__
(
self
,
ignore_border
=
False
,
mode
=
'max'
,
openmp
=
None
):
def
__init__
(
self
,
ignore_border
=
False
,
mode
=
'max'
,
ndim
=
None
,
openmp
=
None
):
super
(
Pool
,
self
)
.
__init__
(
openmp
=
openmp
)
self
.
ndim
=
ndim
self
.
ignore_border
=
ignore_border
if
mode
not
in
[
'max'
,
'average_inc_pad'
,
'average_exc_pad'
,
'sum'
]:
raise
ValueError
(
...
...
@@ -244,6 +270,7 @@ class Pool(OpenMPOp):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
if
len
(
node
.
inputs
)
==
1
:
# Old interface
self
.
ndim
=
len
(
node
.
op
.
ds
)
self
.
mode
=
node
.
op
.
mode
ws
=
theano
.
tensor
.
constant
(
node
.
op
.
ds
)
st
=
theano
.
tensor
.
constant
(
node
.
op
.
st
)
...
...
@@ -270,17 +297,22 @@ class Pool(OpenMPOp):
storage_map
[
pad
]
=
[
None
]
compute_map
[
pad
]
=
[
False
]
def
make_node
(
self
,
x
,
ws
,
stride
=
None
,
pad
=
(
0
,
0
)
):
def
make_node
(
self
,
x
,
ws
,
stride
=
None
,
pad
=
None
):
# TODO: consider restricting the dtype?
x
=
tensor
.
as_tensor_variable
(
x
)
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
x
.
type
.
ndim
-
2
if
stride
is
None
:
stride
=
ws
if
isinstance
(
pad
,
(
tuple
,
list
)):
if
tuple
(
pad
)
!=
(
0
,
0
)
and
not
self
.
ignore_border
:
if
pad
is
None
:
pad
=
(
0
,)
*
nd
elif
isinstance
(
pad
,
(
tuple
,
list
)):
if
max
(
pad
)
!=
0
and
not
self
.
ignore_border
:
raise
NotImplementedError
(
'padding works only with ignore_border=True'
)
if
isinstance
(
ws
,
(
tuple
,
list
)):
if
pad
[
0
]
>=
ws
[
0
]
or
pad
[
1
]
>=
ws
[
1
]
:
if
any
(
pad
[
i
]
>=
ws
[
i
]
for
i
in
range
(
nd
))
:
raise
NotImplementedError
(
'padding_h and padding_w must be smaller than strides'
)
ws
=
tensor
.
as_tensor_variable
(
ws
)
...
...
@@ -289,7 +321,7 @@ class Pool(OpenMPOp):
assert
ws
.
ndim
==
1
assert
stride
.
ndim
==
1
assert
pad
.
ndim
==
1
if
x
.
type
.
ndim
!=
4
:
if
x
.
type
.
ndim
<
nd
:
raise
TypeError
()
if
not
ws
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
...
...
@@ -298,42 +330,36 @@ class Pool(OpenMPOp):
if
not
pad
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Padding parameters must be ints.'
)
# If the input shape are broadcastable we can have 0 in the output shape
broad
=
x
.
broadcastable
[:
2
]
+
(
False
,
False
)
broad
=
x
.
broadcastable
[:
-
nd
]
+
(
False
,)
*
nd
out
=
tensor
.
TensorType
(
x
.
dtype
,
broad
)
return
gof
.
Apply
(
self
,
[
x
,
ws
,
stride
,
pad
],
[
out
()])
def
perform
(
self
,
node
,
inp
,
out
):
x
,
ws
,
stride
,
pad
=
inp
z
,
=
out
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
2
,)
if
len
(
x
.
shape
)
!=
4
:
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
len
(
x
.
shape
)
-
2
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
nd
,)
if
len
(
x
.
shape
)
<
nd
:
raise
NotImplementedError
(
'Pool requires
4D input for now'
)
z_shape
=
self
.
out_shape
(
x
.
shape
,
ws
,
self
.
ignore_border
,
stride
,
pad
)
'Pool requires
input with {} or more dimensions'
.
format
(
nd
)
)
z_shape
=
self
.
out_shape
(
x
.
shape
,
ws
,
self
.
ignore_border
,
stride
,
pad
,
nd
)
if
not
self
.
ignore_border
:
assert
z_shape
[
2
]
>
0
assert
z_shape
[
3
]
>
0
assert
all
(
z
>
0
for
z
in
z_shape
[
-
nd
:])
if
(
z
[
0
]
is
None
)
or
(
z
[
0
]
.
shape
!=
z_shape
):
z
[
0
]
=
numpy
.
empty
(
z_shape
,
dtype
=
x
.
dtype
)
zz
=
z
[
0
]
# number of pooling output rows
pr
=
zz
.
shape
[
-
2
]
# number of pooling output cols
pc
=
zz
.
shape
[
-
1
]
ws0
,
ws1
=
ws
st0
,
st1
=
stride
pad_h
=
pad
[
0
]
pad_w
=
pad
[
1
]
img_rows
=
x
.
shape
[
-
2
]
+
2
*
pad_h
img_cols
=
x
.
shape
[
-
1
]
+
2
*
pad_w
# size of pooling output
pool_out_shp
=
zz
.
shape
[
-
nd
:]
img_shp
=
tuple
(
x
.
shape
[
-
nd
+
i
]
+
2
*
pad
[
i
]
for
i
in
xrange
(
nd
))
inc_pad
=
self
.
mode
==
'average_inc_pad'
# pad the image
if
(
pad_h
,
pad_w
)
!=
(
0
,
0
):
y
=
numpy
.
zeros
(
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
dtype
=
x
.
dtype
)
y
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
=
x
if
max
(
pad
)
!=
0
:
y
=
numpy
.
zeros
(
x
.
shape
[:
-
nd
]
+
img_shp
,
dtype
=
x
.
dtype
)
y
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
=
x
else
:
y
=
x
func
=
numpy
.
max
...
...
@@ -342,28 +368,30 @@ class Pool(OpenMPOp):
elif
self
.
mode
!=
'max'
:
func
=
numpy
.
average
for
n
in
xrange
(
x
.
shape
[
0
]):
for
k
in
xrange
(
x
.
shape
[
1
]):
for
r
in
xrange
(
pr
):
row_st
=
r
*
st0
row_end
=
builtins
.
min
(
row_st
+
ws0
,
img_rows
)
if
not
inc_pad
:
row_st
=
builtins
.
max
(
row_st
,
pad_h
)
row_end
=
builtins
.
min
(
row_end
,
x
.
shape
[
-
2
]
+
pad_h
)
for
c
in
xrange
(
pc
):
col_st
=
c
*
st1
col_end
=
builtins
.
min
(
col_st
+
ws1
,
img_cols
)
if
not
inc_pad
:
col_st
=
builtins
.
max
(
col_st
,
pad_w
)
col_end
=
builtins
.
min
(
col_end
,
x
.
shape
[
-
1
]
+
pad_w
)
zz
[
n
,
k
,
r
,
c
]
=
func
(
y
[
n
,
k
,
row_st
:
row_end
,
col_st
:
col_end
])
# precompute the region boundaries for each dimension
region_slices
=
[[]
for
i
in
xrange
(
nd
)]
for
i
in
xrange
(
nd
):
for
j
in
xrange
(
pool_out_shp
[
i
]):
start
=
j
*
stride
[
i
]
end
=
builtins
.
min
(
start
+
ws
[
i
],
img_shp
[
i
])
if
not
inc_pad
:
start
=
builtins
.
max
(
start
,
pad
[
i
])
end
=
builtins
.
min
(
end
,
img_shp
[
i
]
-
pad
[
i
])
region_slices
[
i
]
.
append
(
slice
(
start
,
end
))
# iterate over non-pooling dimensions
for
k
in
numpy
.
ndindex
(
*
x
.
shape
[:
-
nd
]):
zzk
=
zz
[
k
]
yk
=
y
[
k
]
# iterate over pooling regions
for
r
in
numpy
.
ndindex
(
*
pool_out_shp
):
zzk
[
r
]
=
func
(
yk
[[
region_slices
[
i
][
r
[
i
]]
for
i
in
xrange
(
nd
)]])
def
infer_shape
(
self
,
node
,
in_shapes
):
ws
,
stride
,
pad
=
[
node
.
inputs
[
1
],
node
.
inputs
[
2
],
node
.
inputs
[
3
]]
shp
=
self
.
out_shape
(
in_shapes
[
0
],
ws
,
self
.
ignore_border
,
stride
,
pad
)
pad
,
self
.
ndim
)
return
[
shp
]
def
grad
(
self
,
inp
,
grads
):
...
...
@@ -372,10 +400,12 @@ class Pool(OpenMPOp):
disc
=
[
DisconnectedType
()()
for
i
in
inp
[
1
:]]
if
self
.
mode
==
'max'
:
maxout
=
self
(
x
,
ws
,
stride
,
pad
)
return
[
MaxPoolGrad
(
ignore_border
=
self
.
ignore_border
)(
return
[
MaxPoolGrad
(
ndim
=
self
.
ndim
,
ignore_border
=
self
.
ignore_border
)(
x
,
maxout
,
gz
,
ws
=
ws
,
stride
=
stride
,
pad
=
pad
)]
+
disc
else
:
return
[
AveragePoolGrad
(
ignore_border
=
self
.
ignore_border
,
return
[
AveragePoolGrad
(
ndim
=
self
.
ndim
,
ignore_border
=
self
.
ignore_border
,
mode
=
self
.
mode
)(
x
,
gz
,
ws
=
ws
,
stride
=
stride
,
pad
=
pad
)]
+
disc
...
...
@@ -392,292 +422,386 @@ class Pool(OpenMPOp):
raise
theano
.
gof
.
utils
.
MethodNotDefined
()
x
,
ws
,
stride
,
pad
=
inp
z
,
=
out
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
node
.
inputs
[
0
]
.
ndim
-
2
total_ndim
=
node
.
inputs
[
0
]
.
ndim
non_pool_ndim
=
total_ndim
-
nd
fail
=
sub
[
'fail'
]
ignore_border
=
int
(
self
.
ignore_border
)
if
self
.
openmp
:
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, c_st, c_end, collector) schedule(static)'
# run in parallel over each pooling block
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, collector) schedule(static)'
else
:
omp_parallel
=
''
ccode
=
"""
int ws0, ws1, st0, st1, pd0, pd1;
int typenum = PyArray_ObjectType((PyObject*)
%(x)
s, 0);
int z_r, z_c; // shape of the output
int r, c; // shape of the padded_input
if(PyArray_DIM(
%(ws)
s, 0)!=2)
if(PyArray_NDIM(
%(x)
s)!=
%(total_ndim)
s)
{
PyErr_SetString(PyExc_ValueError, "
ws must be a vector of size 2
");
PyErr_SetString(PyExc_ValueError, "
x must be a
%(total_ndim)
sD ndarray
");
%(fail)
s;
}
if(PyArray_DIM(
%(
stride)
s, 0)!=2
)
if(PyArray_DIM(
%(
ws)
s, 0)!=
%(nd)
s
)
{
PyErr_SetString(PyExc_ValueError, "
stride must be a vector of size 2
");
PyErr_SetString(PyExc_ValueError, "
ws must be a vector of size
%(nd)
s
");
%(fail)
s;
}
if(PyArray_DIM(
%(
pad)
s, 0)!=2
)
if(PyArray_DIM(
%(
stride)
s, 0)!=
%(nd)
s
)
{
PyErr_SetString(PyExc_ValueError, "
pad must be a vector of size 2
");
PyErr_SetString(PyExc_ValueError, "
stride must be a vector of size
%(nd)
s
");
%(fail)
s;
}
// Getting ws, stride and pad
ws0 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 1));
if(PyArray_NDIM(
%(x)
s)!=4)
if(PyArray_DIM(
%(pad)
s, 0)!=
%(nd)
s)
{
PyErr_SetString(PyExc_ValueError, "x must be a 4d ndarray");
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
%(nd)
s");
%(fail)
s;
}
int z[
%(nd)
s]; // shape of the output
int r[
%(nd)
s]; // shape of the padded_input
int ws[
%(nd)
s];
int st[
%(nd)
s];
int pd[
%(nd)
s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<
%(nd)
s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, i));
r[i] = PyArray_DIMS(
%(x)
s)[
%(non_pool_ndim)
s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
if (!
%(ignore_border)
s && nonzero_padding)
{
PyErr_SetString(PyExc_ValueError,
"padding must be zero when ignore border is False");
%(fail)
s;
}
r = PyArray_DIMS(
%(x)
s)[2];
c = PyArray_DIMS(
%(x)
s)[3];
r += pd0 * 2;
c += pd1 * 2;
if (pd0 != 0 && pd1 != 0 && !
%(ignore_border)
s)
{
PyErr_SetString(PyExc_ValueError,
"padding must be (0,0) when ignore border is False");
%(fail)
s;
}
if (
%(ignore_border)
s)
{
// '/' in C is different from '/' in python
if (r - ws0 < 0)
for (int i=0; i<
%(nd)
s; i++)
{
z_r = 0;
}
else
{
z_r = (r - ws0) / st0 + 1;
}
if (c - ws1 < 0)
{
z_c = 0;
}
else
{
z_c = (c - ws1) / st1 + 1;
// '/' in C is different from '/' in python
if (r[i] - ws[i] < 0)
{
z[i] = 0;
}
else
{
z[i] = (r[i] - ws[i]) / st[i] + 1;
}
}
}
else
{
// decide how many rows the output has
if (st0 >= ws0)
for (int i=0; i<
%(nd)
s; i++)
{
z_r = (r - 1) / st0 + 1;
}
else
{
z_r = std::max(0, (r - 1 - ws0 + st0) / st0) + 1;
// decide how many rows/cols the output has
if (st[i] >= ws[i])
{
z[i] = (r[i] - 1) / st[i] + 1;
}
else
{
z[i] = std::max(0, (r[i] - 1 - ws[i] + st[i]) / st[i]) + 1;
}
assert(z[i] > 0);
}
// decide how many columns the output has
if (st1 >= ws1)
}
// memory allocation of z if necessary
int mem_nec;
mem_nec = 0;
if ((!
%(z)
s) || *PyArray_DIMS(
%(z)
s)!=
%(total_ndim)
s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
z_c = (c - 1) / st1 + 1;
if (PyArray_DIMS(
%(z)
s)[i] != PyArray_DIMS(
%(x)
s)[i])
{
mem_nec = 1;
break;
}
}
else
}
if (!mem_nec)
{
for (int i=0; i<
%(nd)
s; i++)
{
z_c = std::max(0, (c - 1 - ws1 + st0) / st1) + 1;
if (PyArray_DIMS(
%(z)
s)[
%(non_pool_ndim)
s + i] != z[i])
{
mem_nec = 1;
break;
}
}
assert(z_r > 0);
assert(z_c > 0);
}
// memory allocation of z if necessary
if ((!
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(PyArray_DIMS(
%(z)
s)[0] != PyArray_DIMS(
%(x)
s)[0])
||(PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(x)
s)[1])
||(PyArray_DIMS(
%(z)
s)[2] != z_r)
||(PyArray_DIMS(
%(z)
s)[3] != z_c)
)
if (mem_nec)
{
if (
%(z)
s) Py_XDECREF(
%(z)
s);
npy_intp dims[4] = {0,0,0,0};
dims[0]=PyArray_DIMS(
%(x)
s)[0];
dims[1]=PyArray_DIMS(
%(x)
s)[1];
dims[2]=z_r;
dims[3]=z_c;
npy_intp dims[
%(total_ndim)
s];
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
dims[i] = PyArray_DIMS(
%(x)
s)[i];
}
for (int i=0; i<
%(nd)
s; i++)
{
dims[
%(non_pool_ndim)
s + i] = z[i];
}
//TODO: zeros not necessary
%(z)
s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
%(z)
s = (PyArrayObject*) PyArray_ZEROS(
%(total_ndim)
s, dims, typenum,0);
}
// initialize temp var for the value in a region
dtype_
%(x)
s collector;
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<
%(nd)
s; i++)
{
z_prod *= z[i];
}
// used for indexing a pool region inside the input
dtype_
%(x)
s collector; // temp var for the value in a region
if (z_r && z_c)
if (z_prod)
{
int r_st, r_end, c_st, c_end;
// will be used to hold start and end index of a region
int r_st[
%(nd)
s];
int r_end[
%(nd)
s];
// index for iterating over the pooling regions
int r_idx[
%(nd)
s];
// placeholder for PyArray indexing (output)
long int o_idx[
%(total_ndim)
s];
// placeholder for PyArray indexing (input)
long int i_idx[
%(total_ndim)
s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
non_pooling_prod *= PyArray_DIMS(
%(x)
s)[i];
}
%(omp_parallel)
s
for(int t = 0; t < PyArray_DIMS(
%(x)
s)[0] * PyArray_DIMS(
%(x)
s)[1]; t++){
int b = t
%%
PyArray_DIMS(
%(x)
s)[0];
int k = t / PyArray_DIMS(
%(x)
s)[0];
for(int i=0; i < z_r; i++){
r_st = i * st0;
r_end = r_st + ws0;
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (
%(non_pool_ndim)
s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<
%(non_pool_ndim)
s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(
%(x)
s)[i - 1];
o_idx[i - 1] = o_idx[i - 1]
%%
PyArray_DIMS(
%(x)
s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
for (r_idx[
%(i)
s]=0; r_idx[
%(i)
s] < z[
%(i)
s]; r_idx[
%(i)
s]++) {
r_st[
%(i)
s] = r_idx[
%(i)
s] * st[
%(i)
s];
r_end[
%(i)
s] = r_st[
%(i)
s] + ws[
%(i)
s];
// skip the padding
r_st
= r_st < pd0 ? pd0 : r_st
;
r_end
= r_end > (r - pd0) ? r - pd0 : r_end
;
r_st
[
%(i)
s] = r_st[
%(i)
s] < pd[
%(i)
s] ? pd[
%(i)
s] : r_st[
%(i)
s]
;
r_end
[
%(i)
s] = r_end[
%(i)
s] > (r[
%(i)
s] - pd[
%(i)
s]) ? r[
%(i)
s] - pd[
%(i)
s] : r_end[
%(i)
s]
;
// from padded_img space to img space
r_st
-= pd0
;
r_end
-= pd0
;
r_st
[
%(i)
s] -= pd[
%(i)
s]
;
r_end
[
%(i)
s] -= pd[
%(i)
s]
;
// handle the case where no padding, ignore border is True
if (
%(ignore_border)
s)
{
r_end
= r_end > r ? r : r_end
;
r_end
[
%(i)
s] = r_end[
%(i)
s] > r[
%(i)
s] ? r[
%(i)
s] : r_end[
%(i)
s]
;
}
for(int j=0; j<z_c; j++){
c_st = j * st1;
c_end = c_st + ws1;
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st;
c_end = c_end > (c - pd1) ? c - pd1 : c_end;
dtype_
%(z)
s * z = (
(dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s, b, k, i, j)));
// change coordinates from padding_img space into img space
c_st -= pd1;
c_end -= pd1;
// handle the case where no padding, ignore border is True
if (
%(ignore_border)
s)
{
c_end = c_end > c ? c : c_end;
}
// use the index to find the correct position in the output
o_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_idx[
%(i)
s];
"""
%
dict
(
i
=
i
,
ignore_border
=
ignore_border
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
// get a pointer to the correct position in the output
dtype_
%(z)
s * z;
if (
%(total_ndim)
s == 4)
z = ((dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s, o_idx[0], o_idx[1], o_idx[2], o_idx[3])));
else
z = ((dtype_
%(z)
s*)(PyArray_GetPtr(
%(z)
s, o_idx)));
"""
if
self
.
mode
==
'max'
:
for
i
in
xrange
(
nd
):
ccode
+=
"""
// set the first index of dimension
%(i)
s
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_st[
%(i)
s];
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
// use the first element as the initial value of collector
collector = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,r_st,c_st)))[0];
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
{
for(int n=c_st; n<c_end; n++)
{
dtype_
%(x)
s a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,m,n)))[0];
collector = (a > collector) ? a : collector;
}
}
z[0] = collector;
// use the first element as the initial value of collector
if (
%(total_ndim)
s == 4)
collector = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
collector = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx)))[0];
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s++)
{
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
// update maximum
dtype_
%(x)
s a;
if (
%(total_ndim)
s == 4)
a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
a = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx)))[0];
collector = (a > collector) ? a : collector;
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // for loop over region
"""
ccode
+=
"""
z[0] = collector;
"""
elif
self
.
mode
in
(
'sum'
,
'average_exc_pad'
,
'average_inc_pad'
):
ccode
+=
"""
// initialize the sum at zero
collector = ((dtype_
%(x)
s)(0));
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
{
for(int n=c_st; n<c_end; n++)
{
dtype_
%(x)
s a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,m,n)))[0];
collector += a;
}
}
// initialize the sum at zero
collector = ((dtype_
%(x)
s)(0));
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s++)
{
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
// update sum
dtype_
%(x)
s a;
if (
%(total_ndim)
s == 4)
a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
a = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx)))[0];
collector += a;
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // for loop over region
"""
if
self
.
mode
==
"sum"
:
ccode
+=
"""
z[0] = collector;
z[0] = collector;
"""
elif
self
.
mode
==
'average_inc_pad'
and
self
.
ignore_border
:
# region size = product over all pooling dimensions
region_size
=
' * '
.
join
(
'ws[
%
d]'
%
i
for
i
in
xrange
(
nd
))
ccode
+=
"""
z[0] = collector / (ws0 * ws1
);
"""
z[0] = collector / (
%(region_size)
s
);
"""
%
dict
(
region_size
=
region_size
)
else
:
# region size = number elements of in this region
region_size
=
' * '
.
join
(
'(r_end[
%
d]-r_st[
%
d])'
%
(
i
,
i
)
for
i
in
xrange
(
nd
))
ccode
+=
"""
z[0] = collector / ((r_end-r_st)*(c_end-c_st));
"""
z[0] = collector / (
%(region_size)
s);
"""
%
dict
(
region_size
=
region_size
)
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // loop over pooling dimension
"""
ccode
+=
"""
}
}
}
}
} // for loop over non-pooling dimensions
} // if z_prod
"""
return
ccode
%
locals
()
def
c_code_cache_version
(
self
):
return
(
0
,
6
,
8
,
6
,
self
.
openmp
)
return
(
0
,
6
,
8
,
7
,
self
.
openmp
)
class
PoolGrad
(
OpenMPOp
):
__props__
=
(
'ignore_border'
,
'mode'
)
__props__
=
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
@staticmethod
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
)
):
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
None
,
ndim
=
None
):
"""Return the shape of the output from this op, for input of given
shape and flags.
Parameters
----------
imgshape : tuple of integers or scalar Theano variables
the shape of a tensor of images. The last
two
elements are
interpreted as the
number of rows, and the number of col
s.
ds : tuple of
two
ints
the shape of a tensor of images. The last
N
elements are
interpreted as the
downsampling dimension
s.
ds : tuple of
N
ints
downsample factor over rows and columns this parameter
indicates the size of the pooling region
st : tuple of two ints
the stride size. This is the distance between the pooling
regions. If it's set to None, in which case it equlas ds.
ignore_border : bool
if ds doesn't divide imgshape, do we include an extra
row/col of partial downsampling (False) or ignore it
(True).
padding : tuple of two ints
(pad_h, pad_w), pad zeros to extend beyond four borders of
the images, pad_h is the size of the top and bottom
margins, and pad_w is the size of the left and right
margins.
If ds doesn't divide imgshape, do we include an extra row/col/slice
of partial downsampling (False) or ignore it (True).
st : list or tuple of N ints or None
Stride size, which is the number of shifts over rows/cols/slices to get the
next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding : tuple of N ints or None
For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
Returns
-------
list :
the shape of the output from this op, for input of given
shape. This will have the same length as imgshape, but
with last
two
elements reduced as per the downsampling &
with last
N
elements reduced as per the downsampling &
ignore_border flags.
"""
if
len
(
imgshape
)
<
2
:
raise
TypeError
(
'imgshape must have at least two elements '
'(rows, cols)'
)
if
ndim
is
None
:
ndim
=
len
(
imgshape
)
-
2
if
len
(
imgshape
)
<
ndim
:
raise
TypeError
(
'imgshape must have at least {} dimensions'
.
format
(
ndim
))
if
st
is
None
:
st
=
ds
r
,
c
=
imgshape
[
-
2
:]
r
+=
padding
[
0
]
*
2
c
+=
padding
[
1
]
*
2
if
ignore_border
:
out_r
=
(
r
-
ds
[
0
])
//
st
[
0
]
+
1
out_c
=
(
c
-
ds
[
1
])
//
st
[
1
]
+
1
if
isinstance
(
r
,
theano
.
Variable
):
nr
=
tensor
.
maximum
(
out_r
,
0
)
else
:
nr
=
numpy
.
maximum
(
out_r
,
0
)
if
isinstance
(
c
,
theano
.
Variable
):
nc
=
tensor
.
maximum
(
out_c
,
0
)
else
:
nc
=
numpy
.
maximum
(
out_c
,
0
)
else
:
if
isinstance
(
r
,
theano
.
Variable
):
nr
=
tensor
.
switch
(
tensor
.
ge
(
st
[
0
],
ds
[
0
]),
(
r
-
1
)
//
st
[
0
]
+
1
,
tensor
.
maximum
(
0
,
(
r
-
1
-
ds
[
0
])
//
st
[
0
]
+
1
)
+
1
)
elif
st
[
0
]
>=
ds
[
0
]:
nr
=
(
r
-
1
)
//
st
[
0
]
+
1
else
:
nr
=
max
(
0
,
(
r
-
1
-
ds
[
0
])
//
st
[
0
]
+
1
)
+
1
if
isinstance
(
c
,
theano
.
Variable
):
nc
=
tensor
.
switch
(
tensor
.
ge
(
st
[
1
],
ds
[
1
]),
(
c
-
1
)
//
st
[
1
]
+
1
,
tensor
.
maximum
(
0
,
(
c
-
1
-
ds
[
1
])
//
st
[
1
]
+
1
)
+
1
)
elif
st
[
1
]
>=
ds
[
1
]:
nc
=
(
c
-
1
)
//
st
[
1
]
+
1
if
padding
is
None
:
padding
=
(
0
,)
*
ndim
patch_shape
=
tuple
(
tensor
.
extract_constant
(
imgshape
[
-
ndim
+
i
])
+
padding
[
i
]
*
2
for
i
in
xrange
(
ndim
))
def
compute_out
(
v
,
downsample
,
stride
):
if
ignore_border
:
out
=
(
v
-
downsample
)
//
stride
+
1
if
isinstance
(
out
,
theano
.
Variable
):
return
tensor
.
maximum
(
out
,
0
)
else
:
return
numpy
.
maximum
(
out
,
0
)
else
:
nc
=
max
(
0
,
(
c
-
1
-
ds
[
1
])
//
st
[
1
]
+
1
)
+
1
if
isinstance
(
v
,
theano
.
Variable
):
return
tensor
.
switch
(
tensor
.
ge
(
stride
,
downsample
),
(
v
-
1
)
//
stride
+
1
,
tensor
.
maximum
(
0
,
(
v
-
1
-
downsample
)
//
stride
+
1
)
+
1
)
elif
stride
>=
downsample
:
return
(
v
-
1
)
//
stride
+
1
else
:
return
max
(
0
,
(
v
-
1
-
downsample
)
//
stride
+
1
)
+
1
out_shape
=
[
compute_out
(
patch_shape
[
i
],
ds
[
i
],
st
[
i
])
for
i
in
xrange
(
ndim
)]
rval
=
list
(
imgshape
[:
-
2
])
+
[
nr
,
nc
]
rval
=
list
(
imgshape
[:
-
ndim
])
+
out_shape
return
rval
def
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
openmp
=
None
):
def
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
ndim
=
None
,
openmp
=
None
):
self
.
ndim
=
ndim
self
.
ignore_border
=
ignore_border
if
mode
not
in
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
raise
ValueError
(
...
...
@@ -689,6 +813,7 @@ class PoolGrad(OpenMPOp):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
if
len
(
node
.
inputs
)
<
5
:
# 5 for AveragePoolGrad, 6 for MaxPoolGrad
# Old interface
self
.
ndim
=
len
(
node
.
op
.
ds
)
self
.
mode
=
node
.
op
.
mode
ws
=
theano
.
tensor
.
constant
(
node
.
op
.
ds
)
st
=
theano
.
tensor
.
constant
(
node
.
op
.
st
)
...
...
@@ -720,26 +845,32 @@ class PoolGrad(OpenMPOp):
class
MaxPoolGrad
(
PoolGrad
):
def
__init__
(
self
,
ignore_border
,
openmp
=
None
):
PoolGrad
.
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
openmp
=
openmp
)
def
__init__
(
self
,
ignore_border
,
ndim
=
None
,
openmp
=
None
):
PoolGrad
.
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
ndim
=
ndim
,
openmp
=
openmp
)
def
make_node
(
self
,
x
,
maxout
,
gz
,
ws
,
stride
=
None
,
pad
=
(
0
,
0
)
):
def
make_node
(
self
,
x
,
maxout
,
gz
,
ws
,
stride
=
None
,
pad
=
None
):
# make_node should only be called by the grad function of
# Pool, so these asserts should not fail.
x
=
tensor
.
as_tensor_variable
(
x
)
maxout
=
tensor
.
as_tensor_variable
(
maxout
)
gz
=
tensor
.
as_tensor_variable
(
gz
)
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
x
.
ndim
-
2
if
stride
is
None
:
stride
=
ws
if
pad
is
None
:
pad
=
(
0
,)
*
nd
ws
=
tensor
.
as_tensor_variable
(
ws
)
stride
=
tensor
.
as_tensor_variable
(
stride
)
pad
=
tensor
.
as_tensor_variable
(
pad
)
assert
isinstance
(
x
,
Variable
)
and
x
.
ndim
==
4
assert
isinstance
(
maxout
,
Variable
)
and
maxout
.
ndim
==
4
assert
isinstance
(
gz
,
Variable
)
and
gz
.
ndim
==
4
assert
isinstance
(
x
,
Variable
)
and
x
.
ndim
>=
nd
assert
isinstance
(
maxout
,
Variable
)
and
maxout
.
ndim
>=
nd
assert
isinstance
(
gz
,
Variable
)
and
gz
.
ndim
>=
nd
assert
isinstance
(
ws
,
Variable
)
and
ws
.
ndim
==
1
assert
isinstance
(
stride
,
Variable
)
and
stride
.
ndim
==
1
assert
isinstance
(
pad
,
Variable
)
and
pad
.
ndim
==
1
assert
x
.
ndim
==
maxout
.
ndim
==
gz
.
ndim
>=
nd
if
not
ws
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
if
not
stride
.
dtype
.
startswith
(
'int'
):
...
...
@@ -752,41 +883,51 @@ class MaxPoolGrad(PoolGrad):
assert
self
.
mode
==
'max'
x
,
maxout
,
gz
,
ws
,
stride
,
pad
=
inp
gx_stg
,
=
out
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
2
,)
# number of pooling output rows
pr
=
maxout
.
shape
[
-
2
]
# number of pooling output cols
pc
=
maxout
.
shape
[
-
1
]
ws0
,
ws1
=
ws
st0
,
st1
=
stride
pad_h
=
pad
[
0
]
pad_w
=
pad
[
1
]
img_rows
=
x
.
shape
[
-
2
]
+
2
*
pad_h
img_cols
=
x
.
shape
[
-
1
]
+
2
*
pad_w
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
len
(
x
.
shape
)
-
2
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
nd
,)
if
len
(
x
.
shape
)
<
nd
:
raise
NotImplementedError
(
'MaxPoolGrad requires input with {} or more dimensions'
.
format
(
nd
))
pool_out_shp
=
maxout
.
shape
[
-
nd
:]
img_shp
=
tuple
(
x
.
shape
[
-
nd
+
i
]
+
2
*
pad
[
i
]
for
i
in
xrange
(
nd
))
# pad the image
if
(
pad_h
,
pad_w
)
!=
(
0
,
0
):
y
=
numpy
.
zeros
(
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
dtype
=
x
.
dtype
)
y
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
=
x
if
max
(
pad
)
!=
0
:
y
=
numpy
.
zeros
(
x
.
shape
[:
-
nd
]
+
img_shp
,
dtype
=
x
.
dtype
)
y
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
=
x
else
:
y
=
x
gx
=
numpy
.
zeros_like
(
y
)
for
n
in
xrange
(
x
.
shape
[
0
]):
for
k
in
xrange
(
x
.
shape
[
1
]):
for
r
in
xrange
(
pr
):
row_st
=
builtins
.
max
(
r
*
st0
,
pad_h
)
row_end
=
builtins
.
min
(
row_st
+
ws0
,
img_rows
)
for
c
in
xrange
(
pc
):
col_st
=
builtins
.
max
(
c
*
st1
,
pad_w
)
col_end
=
builtins
.
min
(
col_st
+
ws1
,
img_cols
)
for
row_ind
in
xrange
(
row_st
,
row_end
):
for
col_ind
in
xrange
(
col_st
,
col_end
):
if
(
maxout
[
n
,
k
,
r
,
c
]
==
y
[
n
,
k
,
row_ind
,
col_ind
]):
gx
[
n
,
k
,
row_ind
,
col_ind
]
+=
gz
[
n
,
k
,
r
,
c
]
# precompute the region boundaries for each dimension
region_ranges
=
[[]
for
i
in
xrange
(
nd
)]
for
i
in
xrange
(
nd
):
for
j
in
xrange
(
pool_out_shp
[
i
]):
start
=
builtins
.
max
(
j
*
stride
[
i
],
pad
[
i
])
end
=
builtins
.
min
(
start
+
ws
[
i
],
img_shp
[
i
])
region_ranges
[
i
]
.
append
(
xrange
(
start
,
end
))
# iterate over non-pooling dimensions
for
k
in
numpy
.
ndindex
(
*
x
.
shape
[:
-
nd
]):
gxk
=
gx
[
k
]
gzk
=
gz
[
k
]
yk
=
y
[
k
]
maxoutk
=
maxout
[
k
]
# iterate over pooling regions
for
r
in
numpy
.
ndindex
(
*
pool_out_shp
):
maxout_value
=
maxoutk
[
r
]
# iterate inside region
for
c
in
itertools
.
product
(
*
[
region_ranges
[
i
][
r
[
i
]]
for
i
in
xrange
(
nd
)]):
if
maxout_value
==
yk
[
c
]:
gxk
[
c
]
+=
gzk
[
r
]
# unpad the image
gx
=
gx
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
gx
=
gx
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
gx_stg
[
0
]
=
gx
def
grad
(
self
,
inp
,
grads
):
...
...
@@ -794,7 +935,8 @@ class MaxPoolGrad(PoolGrad):
ggx
,
=
grads
return
([
theano
.
tensor
.
zeros_like
(
x
),
theano
.
tensor
.
zeros_like
(
maxout
),
DownsampleFactorMaxGradGrad
(
ignore_border
=
self
.
ignore_border
)(
DownsampleFactorMaxGradGrad
(
ndim
=
self
.
ndim
,
ignore_border
=
self
.
ignore_border
)(
x
,
maxout
,
ggx
,
ws
,
stride
,
pad
)]
+
[
DisconnectedType
()()
for
i
in
inp
[
3
:]])
...
...
@@ -805,161 +947,253 @@ class MaxPoolGrad(PoolGrad):
assert
self
.
mode
==
'max'
x
,
z
,
gz
,
ws
,
stride
,
pad
=
inp
gx
,
=
out
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
node
.
inputs
[
0
]
.
ndim
-
2
total_ndim
=
node
.
inputs
[
0
]
.
ndim
non_pool_ndim
=
total_ndim
-
nd
fail
=
sub
[
'fail'
]
ignore_border
=
int
(
self
.
ignore_border
)
if
self
.
openmp
:
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)'
# run in parallel over each pooling block
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)'
else
:
omp_parallel
=
''
return
"""
ccode
=
"""
// sanity checks
int x_typenum = PyArray_ObjectType((PyObject*)
%(x)
s, 0);
int z_typenum = PyArray_ObjectType((PyObject*)
%(z)
s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)
%(gz)
s, 0);
int ws0, ws1, st0, st1, pd0, pd1;
int z_r, z_c;
int r, c; // shape of the padded_input
if ((x_typenum != z_typenum) || (x_typenum != gz_typenum))
{
PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)
s;
}
if(PyArray_NDIM(
%(x)
s)!=
4
)
if(PyArray_NDIM(
%(x)
s)!=
%(total_ndim)
s
)
{
PyErr_SetString(PyExc_ValueError, "x must be a
4d
ndarray");
PyErr_SetString(PyExc_ValueError, "x must be a
%(total_ndim)
sD
ndarray");
%(fail)
s;
}
if(PyArray_NDIM(
%(z)
s)!=
4
)
if(PyArray_NDIM(
%(z)
s)!=
%(total_ndim)
s
)
{
PyErr_SetString(PyExc_ValueError, "z must be a
4d
ndarray");
PyErr_SetString(PyExc_ValueError, "z must be a
%(total_ndim)
sD
ndarray");
%(fail)
s;
}
if(PyArray_NDIM(
%(gz)
s)!=
4
)
if(PyArray_NDIM(
%(gz)
s)!=
%(total_ndim)
s
)
{
PyErr_SetString(PyExc_ValueError, "gz must be a
4d
ndarray");
PyErr_SetString(PyExc_ValueError, "gz must be a
%(total_ndim)
sD
ndarray");
%(fail)
s;
}
if(PyArray_DIM(
%(ws)
s, 0)!=
2
)
if(PyArray_DIM(
%(ws)
s, 0)!=
%(nd)
s
)
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
%(nd)
s
");
%(fail)
s;
}
if(PyArray_DIM(
%(stride)
s, 0)!=
2
)
if(PyArray_DIM(
%(stride)
s, 0)!=
%(nd)
s
)
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
%(nd)
s
");
%(fail)
s;
}
if(PyArray_DIM(
%(pad)
s, 0)!=
2
)
if(PyArray_DIM(
%(pad)
s, 0)!=
%(nd)
s
)
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
%(nd)
s
");
%(fail)
s;
}
// Getting ws, stride and pad
ws0 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 1));
z_r = PyArray_DIMS(
%(z)
s)[2];
z_c = PyArray_DIMS(
%(z)
s)[3];
r = PyArray_DIMS(
%(x)
s)[2];
c = PyArray_DIMS(
%(x)
s)[3];
r += pd0 * 2;
c += pd1 * 2;
// allocating memory for gx
if ((!
%(gx)
s)
|| !PyArray_ISCONTIGUOUS(
%(gx)
s)
|| *PyArray_DIMS(
%(gx)
s)!=4
||(PyArray_DIMS(
%(gx)
s)[0] != PyArray_DIMS(
%(x)
s)[0])
||(PyArray_DIMS(
%(gx)
s)[1] != PyArray_DIMS(
%(x)
s)[1])
||(PyArray_DIMS(
%(gx)
s)[2] != PyArray_DIMS(
%(x)
s)[2])
||(PyArray_DIMS(
%(gx)
s)[3] != PyArray_DIMS(
%(x)
s)[3])
)
int z[
%(nd)
s]; // shape of the output
int r[
%(nd)
s]; // shape of the padded_input
int ws[
%(nd)
s];
int st[
%(nd)
s];
int pd[
%(nd)
s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<
%(nd)
s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, i));
z[i] = PyArray_DIMS(
%(z)
s)[
%(non_pool_ndim)
s + i];
r[i] = PyArray_DIMS(
%(x)
s)[
%(non_pool_ndim)
s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
// allocating memory for output, if necessary
int mem_nec;
mem_nec = 0;
if ((!
%(gx)
s) || !PyArray_ISCONTIGUOUS(
%(gx)
s)
|| *PyArray_DIMS(
%(gx)
s)!=
%(total_ndim)
s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<
%(total_ndim)
s; i++)
{
if (PyArray_DIMS(
%(gx)
s)[i] != PyArray_DIMS(
%(x)
s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
Py_XDECREF(
%(gx)
s);
%(gx)
s = (PyArrayObject*) PyArray_ZEROS(
4
, PyArray_DIMS(
%(x)
s), x_typenum,0);
%(gx)
s = (PyArrayObject*) PyArray_ZEROS(
%(total_ndim)
s
, PyArray_DIMS(
%(x)
s), x_typenum,0);
}
else {
PyArray_FILLWBYTE(
%(gx)
s, 0);
}
dtype_
%(z)
s maximum; // temp var for maximum value in a region
if (z_r && z_c)
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<
%(nd)
s; i++)
{
int r_st, r_end, c_st, c_end;
z_prod *= z[i];
}
if (z_prod)
{
// will be used to hold start and end index of a region
int r_st[
%(nd)
s];
int r_end[
%(nd)
s];
// index for iterating over the pooling regions
int r_idx[
%(nd)
s];
// placeholder for PyArray indexing (output)
long int o_idx[
%(total_ndim)
s];
// placeholder for PyArray indexing (input)
long int i_idx[
%(total_ndim)
s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
non_pooling_prod *= PyArray_DIMS(
%(x)
s)[i];
}
%(omp_parallel)
s
for(int t = 0; t < PyArray_DIMS(
%(x)
s)[0] * PyArray_DIMS(
%(x)
s)[1]; t++){
int b = t
%%
PyArray_DIMS(
%(x)
s)[0];
int k = t / PyArray_DIMS(
%(x)
s)[0];
for(int i=0; i < z_r; i++){
r_st = i * st0;
r_end = r_st + ws0;
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (
%(non_pool_ndim)
s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<
%(non_pool_ndim)
s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(
%(x)
s)[i - 1];
o_idx[i - 1] =o_idx[i - 1]
%%
PyArray_DIMS(
%(x)
s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
for (r_idx[
%(i)
s]=0; r_idx[
%(i)
s] < z[
%(i)
s]; r_idx[
%(i)
s]++) {
r_st[
%(i)
s] = r_idx[
%(i)
s] * st[
%(i)
s];
r_end[
%(i)
s] = r_st[
%(i)
s] + ws[
%(i)
s];
// skip the padding
r_st
= r_st < pd0 ? pd0 : r_st
;
r_end
= r_end > (r - pd0) ? r - pd0 : r_end
;
r_st
[
%(i)
s] = r_st[
%(i)
s] < pd[
%(i)
s] ? pd[
%(i)
s] : r_st[
%(i)
s]
;
r_end
[
%(i)
s] = r_end[
%(i)
s] > (r[
%(i)
s] - pd[
%(i)
s]) ? r[
%(i)
s] - pd[
%(i)
s] : r_end[
%(i)
s]
;
// from padded_img space to img space
r_st -= pd0;
r_end -= pd0;
for(int j=0; j<z_c; j++){
c_st = j * st1;
c_end = c_st + ws1;
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st;
c_end = c_end > (c - pd1) ? c - pd1 : c_end;
// change coordinates from padding_img space into img space
c_st -= pd1;
c_end -= pd1;
r_st[
%(i)
s] -= pd[
%(i)
s];
r_end[
%(i)
s] -= pd[
%(i)
s];
// use the index to find the correct position in the output
o_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_idx[
%(i)
s];
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(gz)
s * gz;
if (
%(total_ndim)
s == 4)
{
// the maximum value
maximum = ((dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
// the gradient corresponding to this maximum value in z
gz = ((dtype_
%(gz)
s*)(PyArray_GETPTR4(
%(gz)
s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the maximum value
maximum = ((dtype_
%(z)
s*)(PyArray_G
ETPTR4(
%(z)
s,b,k,i,j
)))[0];
maximum = ((dtype_
%(z)
s*)(PyArray_G
etPtr(
%(z)
s,o_idx
)))[0];
// the gradient corresponding to this maximum value in z
dtype_
%(gz)
s * gz = (
(dtype_
%(gz)
s*)(PyArray_GETPTR4(
%(gz)
s, b, k, i, j)));
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
gz = ((dtype_
%(gz)
s*)(PyArray_GetPtr(
%(gz)
s, o_idx)));
}
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s++)
{
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(x)
s a;
dtype_
%(gx)
s * gx;
if (
%(total_ndim)
s == 4)
{
for(int n=c_st; n<c_end; n++)
{
dtype_
%(x)
s a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,m,n)))[0];
dtype_
%(gx)
s * gx = (
(dtype_
%(gx)
s*)(PyArray_GETPTR4(
%(gx)
s, b, k, m, n)));
if (a == maximum){
gx[0] = gx[0] + gz[0];
}
}
a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
gx = ((dtype_
%(gx)
s*)(PyArray_GETPTR4(
%(gx)
s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
}
}
}
}
}
"""
%
locals
()
else
{
a = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx)))[0];
gx = ((dtype_
%(gx)
s*)(PyArray_GetPtr(
%(gx)
s, i_idx)));
}
if (a == maximum){
gx[0] = gx[0] + gz[0];
}
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // for loop over region
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // loop over pooling dimension
"""
ccode
+=
"""
} // for loop over non-pooling dimensions
} // if z_prod
"""
return
ccode
%
locals
()
def
c_code_cache_version
(
self
):
return
(
0
,
9
,
self
.
openmp
)
return
(
0
,
10
,
self
.
openmp
)
class
AveragePoolGrad
(
PoolGrad
):
def
__init__
(
self
,
ignore_border
,
mode
=
'average_inc_pad'
):
def
__init__
(
self
,
ignore_border
,
mode
=
'average_inc_pad'
,
ndim
=
None
):
assert
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]
PoolGrad
.
__init__
(
self
,
ignore_border
,
mode
)
PoolGrad
.
__init__
(
self
,
ignore_border
,
mode
,
ndim
)
# There is an extra dummy parameter to match the parameter count
# of MaxPoolGrad. They have to keep the same interface because of
# the DownsampleFactorMaxGrad trick to keep old scripts working
# (see downsample.py for details on this).
def
make_node
(
self
,
x
,
gz
,
ws
,
stride
=
None
,
pad
=
(
0
,
0
)
,
dummy
=
None
):
def
make_node
(
self
,
x
,
gz
,
ws
,
stride
=
None
,
pad
=
None
,
dummy
=
None
):
# make_node should only be called by the grad function of
# Pool, so these asserts should not fail.
x
=
tensor
.
as_tensor_variable
(
x
)
gz
=
tensor
.
as_tensor_variable
(
gz
)
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
x
.
ndim
-
2
if
stride
is
None
:
stride
=
ws
if
pad
is
None
:
pad
=
(
0
,)
*
nd
ws
=
tensor
.
as_tensor_variable
(
ws
)
stride
=
tensor
.
as_tensor_variable
(
stride
)
pad
=
tensor
.
as_tensor_variable
(
pad
)
assert
isinstance
(
x
,
Variable
)
and
x
.
ndim
==
4
assert
isinstance
(
gz
,
Variable
)
and
gz
.
ndim
==
4
assert
isinstance
(
x
,
Variable
)
and
x
.
ndim
>=
nd
assert
isinstance
(
gz
,
Variable
)
and
gz
.
ndim
>=
nd
assert
isinstance
(
ws
,
Variable
)
and
ws
.
ndim
==
1
assert
isinstance
(
stride
,
Variable
)
and
stride
.
ndim
==
1
assert
x
.
ndim
==
gz
.
ndim
>=
nd
assert
isinstance
(
pad
,
Variable
)
and
pad
.
ndim
==
1
if
not
ws
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
...
...
@@ -972,96 +1206,333 @@ class AveragePoolGrad(PoolGrad):
def
perform
(
self
,
node
,
inp
,
out
):
x
,
gz
,
ws
,
stride
,
pad
=
inp
gx_stg
,
=
out
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
2
,)
if
self
.
mode
==
'average_exc_pad'
and
pad
[
0
]
!=
0
and
pad
[
1
]
!=
0
:
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
len
(
x
.
shape
)
-
2
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
nd
,)
if
len
(
x
.
shape
)
<
nd
:
raise
NotImplementedError
(
'AveragePoolGrad requires input with {} or more dimensions'
.
format
(
nd
))
if
self
.
mode
==
'average_exc_pad'
and
max
(
pad
)
!=
0
:
raise
NotImplementedError
()
z_shape
=
self
.
out_shape
(
x
.
shape
,
ws
,
self
.
ignore_border
,
stride
,
pad
)
z_shape
=
self
.
out_shape
(
x
.
shape
,
ws
,
self
.
ignore_border
,
stride
,
pad
,
nd
)
if
(
gx_stg
[
0
]
is
None
)
or
(
gx_stg
[
0
]
.
shape
!=
z_shape
):
gx_stg
[
0
]
=
numpy
.
empty
(
z_shape
,
dtype
=
x
.
dtype
)
zz
=
gx_stg
[
0
]
# number of pooling output rows
pr
=
zz
.
shape
[
-
2
]
# number of pooling output cols
pc
=
zz
.
shape
[
-
1
]
ws0
,
ws1
=
ws
st0
,
st1
=
stride
pad_h
=
pad
[
0
]
pad_w
=
pad
[
1
]
img_rows
=
x
.
shape
[
-
2
]
+
2
*
pad_h
img_cols
=
x
.
shape
[
-
1
]
+
2
*
pad_w
# size of pooling output
pool_out_shp
=
zz
.
shape
[
-
nd
:]
img_shp
=
tuple
(
x
.
shape
[
-
nd
+
i
]
+
2
*
pad
[
i
]
for
i
in
xrange
(
nd
))
inc_pad
=
self
.
mode
==
'average_inc_pad'
sum_mode
=
self
.
mode
==
'sum'
# pad the image
if
(
pad_h
,
pad_w
)
!=
(
0
,
0
):
y
=
numpy
.
zeros
(
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
dtype
=
x
.
dtype
)
y
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
=
x
else
:
y
=
x
gx
=
numpy
.
zeros_like
(
y
)
for
n
in
xrange
(
x
.
shape
[
0
]):
for
k
in
xrange
(
x
.
shape
[
1
]):
for
r
in
xrange
(
pr
):
if
sum_mode
or
inc_pad
:
row_st
=
r
*
st0
else
:
row_st
=
builtins
.
max
(
r
*
st0
,
pad_h
)
row_end
=
builtins
.
min
(
row_st
+
ws0
,
img_rows
)
for
c
in
xrange
(
pc
):
if
sum_mode
or
inc_pad
:
col_st
=
c
*
st1
else
:
col_st
=
builtins
.
max
(
c
*
st1
,
pad_w
)
col_end
=
builtins
.
min
(
col_st
+
ws1
,
img_cols
)
if
sum_mode
:
val
=
gz
[
n
,
k
,
r
,
c
]
else
:
val
=
gz
[
n
,
k
,
r
,
c
]
/
((
row_end
-
row_st
)
*
(
col_end
-
col_st
))
gx
[
n
,
k
,
row_st
:
row_end
,
col_st
:
col_end
]
+=
val
# initialize the padded output
gx
=
numpy
.
zeros
((
x
.
shape
[:
-
nd
]
+
img_shp
),
dtype
=
x
.
dtype
)
# precompute the region boundaries and sizes for each dimension
region_slices
=
[[]
for
i
in
xrange
(
nd
)]
region_sizes
=
[[]
for
i
in
xrange
(
nd
)]
for
i
in
xrange
(
nd
):
for
j
in
xrange
(
pool_out_shp
[
i
]):
if
sum_mode
or
inc_pad
:
start
=
j
*
stride
[
i
]
else
:
start
=
builtins
.
max
(
j
*
stride
[
i
],
pad
[
i
])
end
=
builtins
.
min
(
start
+
ws
[
i
],
img_shp
[
i
])
region_slices
[
i
]
.
append
(
slice
(
start
,
end
))
region_sizes
[
i
]
.
append
(
end
-
start
)
# iterate over non-pooling dimensions
region_slice
=
[
None
]
*
nd
for
k
in
numpy
.
ndindex
(
*
x
.
shape
[:
-
nd
]):
gzk
=
gz
[
k
]
gxk
=
gx
[
k
]
# iterate over pooling regions
for
r
in
numpy
.
ndindex
(
*
pool_out_shp
):
region_size
=
1
for
i
in
xrange
(
nd
):
region_slice
[
i
]
=
region_slices
[
i
][
r
[
i
]]
region_size
*=
region_sizes
[
i
][
r
[
i
]]
if
sum_mode
:
val
=
gzk
[
r
]
else
:
# divide by region size
val
=
gzk
[
r
]
/
region_size
gxk
[
region_slice
]
+=
val
# unpad the image
gx
=
gx
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
gx
=
gx
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
gx_stg
[
0
]
=
gx
def
grad
(
self
,
inp
,
grads
):
x
,
gz
,
ws
,
stride
,
pad
=
inp
ggx
,
=
grads
return
([
theano
.
tensor
.
zeros_like
(
x
),
Pool
(
ignore_border
=
self
.
ignore_border
,
mode
=
self
.
mode
)(
ggx
,
Pool
(
ignore_border
=
self
.
ignore_border
,
ndim
=
self
.
ndim
,
mode
=
self
.
mode
)(
ggx
,
ws
,
stride
,
pad
)]
+
[
DisconnectedType
()()
for
i
in
inp
[
2
:]])
def
connection_pattern
(
self
,
node
):
return
[[
1
],
[
1
],
[
0
],
[
0
],
[
0
]]
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
gz
,
ws
,
stride
,
pad
=
inp
gx
,
=
out
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
node
.
inputs
[
0
]
.
ndim
-
2
total_ndim
=
node
.
inputs
[
0
]
.
ndim
non_pool_ndim
=
total_ndim
-
nd
fail
=
sub
[
'fail'
]
inc_pad
=
int
(
self
.
mode
==
'average_inc_pad'
)
sum_mode
=
int
(
self
.
mode
==
'sum'
)
if
self
.
openmp
:
# run in parallel over each pooling block
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, r_pad_width, r_idx, i_idx, o_idx) schedule(static)'
else
:
omp_parallel
=
''
ccode
=
"""
// sanity checks
int x_typenum = PyArray_ObjectType((PyObject*)
%(x)
s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)
%(gz)
s, 0);
if (x_typenum != gz_typenum)
{
PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)
s;
}
if(PyArray_NDIM(
%(x)
s)!=
%(total_ndim)
s)
{
PyErr_SetString(PyExc_ValueError, "x must be a
%(total_ndim)
sD ndarray");
%(fail)
s;
}
if(PyArray_NDIM(
%(gz)
s)!=
%(total_ndim)
s)
{
PyErr_SetString(PyExc_ValueError, "gz must be a
%(total_ndim)
sD ndarray");
%(fail)
s;
}
if(PyArray_DIM(
%(ws)
s, 0)!=
%(nd)
s)
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
%(nd)
s");
%(fail)
s;
}
if(PyArray_DIM(
%(stride)
s, 0)!=
%(nd)
s)
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
%(nd)
s");
%(fail)
s;
}
if(PyArray_DIM(
%(pad)
s, 0)!=
%(nd)
s)
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
%(nd)
s");
%(fail)
s;
}
int z[
%(nd)
s]; // shape of the output
int r[
%(nd)
s]; // shape of the padded_input
int ws[
%(nd)
s];
int st[
%(nd)
s];
int pd[
%(nd)
s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<
%(nd)
s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, i));
z[i] = PyArray_DIMS(
%(gz)
s)[
%(non_pool_ndim)
s + i];
r[i] = PyArray_DIMS(
%(x)
s)[
%(non_pool_ndim)
s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
if (!
%(inc_pad)
s && !
%(sum_mode)
s && nonzero_padding)
{
PyErr_SetString(PyExc_ValueError,
"padding must be zero for average_exc_pad");
%(fail)
s;
}
// allocating memory for output, if necessary
int mem_nec;
mem_nec = 0;
if ((!
%(gx)
s) || !PyArray_ISCONTIGUOUS(
%(gx)
s)
|| *PyArray_DIMS(
%(gx)
s)!=
%(total_ndim)
s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<
%(total_ndim)
s; i++)
{
if (PyArray_DIMS(
%(gx)
s)[i] != PyArray_DIMS(
%(x)
s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
Py_XDECREF(
%(gx)
s);
%(gx)
s = (PyArrayObject*) PyArray_ZEROS(
%(total_ndim)
s, PyArray_DIMS(
%(x)
s), x_typenum,0);
}
else {
PyArray_FILLWBYTE(
%(gx)
s, 0);
}
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<
%(nd)
s; i++)
{
z_prod *= z[i];
}
if (z_prod)
{
// will be used to hold start and end index of a region
int r_st[
%(nd)
s];
int r_end[
%(nd)
s];
// padded region size
int r_pad_width[
%(nd)
s];
// index for iterating over the pooling regions
int r_idx[
%(nd)
s];
// placeholder for PyArray indexing (output)
long int o_idx[
%(total_ndim)
s];
// placeholder for PyArray indexing (input)
long int i_idx[
%(total_ndim)
s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
non_pooling_prod *= PyArray_DIMS(
%(x)
s)[i];
}
%(omp_parallel)
s
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (
%(non_pool_ndim)
s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<
%(non_pool_ndim)
s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(
%(x)
s)[i - 1];
o_idx[i - 1] =o_idx[i - 1]
%%
PyArray_DIMS(
%(x)
s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
for (r_idx[
%(i)
s]=0; r_idx[
%(i)
s] < z[
%(i)
s]; r_idx[
%(i)
s]++) {
r_st[
%(i)
s] = r_idx[
%(i)
s] * st[
%(i)
s];
if (!
%(sum_mode)
s && !
%(inc_pad)
s && r_st[
%(i)
s] < pd[
%(i)
s])
{
r_st[
%(i)
s] = pd[
%(i)
s];
}
r_end[
%(i)
s] = r_st[
%(i)
s] + ws[
%(i)
s];
r_end[
%(i)
s] = r_end[
%(i)
s] > r[
%(i)
s] ? r[
%(i)
s] : r_end[
%(i)
s];
r_pad_width[
%(i)
s] = r_end[
%(i)
s] - r_st[
%(i)
s];
// from padded_img space to img space
r_st[
%(i)
s] = r_st[
%(i)
s] - pd[
%(i)
s] > 0 ? r_st[
%(i)
s] - pd[
%(i)
s] : 0;
r_end[
%(i)
s] = r_end[
%(i)
s] > r[
%(i)
s] - pd[
%(i)
s] ? r[
%(i)
s] - 2 * pd[
%(i)
s] : r_end[
%(i)
s] - pd[
%(i)
s];
// use the index to find the correct position in the output
o_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_idx[
%(i)
s];
"""
%
dict
(
i
=
i
,
sum_mode
=
sum_mode
,
inc_pad
=
inc_pad
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(gz)
s * gz;
dtype_
%(gz)
s val;
if (
%(total_ndim)
s == 4)
{
// the gradient for this region
gz = ((dtype_
%(gz)
s*)(PyArray_GETPTR4(
%(gz)
s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the gradient for this region
gz = ((dtype_
%(gz)
s*)(PyArray_GetPtr(
%(gz)
s, o_idx)));
}
// compute the contribution
if (
%(sum_mode)
s)
{
val = gz[0];
}
else
{
val = gz[0] / (
%(region_size)
s);
}
"""
region_size
=
' * '
.
join
(
'r_pad_width[
%
d]'
%
i
for
i
in
xrange
(
nd
))
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s++)
{
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(gx)
s * gx;
if (
%(total_ndim)
s == 4)
{
gx = ((dtype_
%(gx)
s*)(PyArray_GETPTR4(
%(gx)
s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
}
else
{
gx = ((dtype_
%(gx)
s*)(PyArray_GetPtr(
%(gx)
s, i_idx)));
}
gx[0] = gx[0] + val;
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // for loop over region
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // loop over pooling dimension
"""
ccode
+=
"""
} // for loop over non-pooling dimensions
} // if z_prod
"""
return
ccode
%
locals
()
def
c_code_cache_version
(
self
):
return
(
0
,
3
,
self
.
openmp
)
class
DownsampleFactorMaxGradGrad
(
OpenMPOp
):
__props__
=
(
'ignore_border'
,
'mode'
)
__props__
=
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
def
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
openmp
=
None
):
def
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
ndim
=
None
,
openmp
=
None
):
self
.
ndim
=
ndim
self
.
ignore_border
=
ignore_border
self
.
mode
=
mode
super
(
DownsampleFactorMaxGradGrad
,
self
)
.
__init__
(
openmp
=
openmp
)
assert
self
.
mode
==
'max'
def
make_node
(
self
,
x
,
maxout
,
gz
,
ws
,
stride
=
None
,
pad
=
(
0
,
0
)
):
def
make_node
(
self
,
x
,
maxout
,
gz
,
ws
,
stride
=
None
,
pad
=
None
):
# make_node should only be called by the grad function of
# MaxPoolGrad, so these asserts should not fail.
x
=
tensor
.
as_tensor_variable
(
x
)
maxout
=
tensor
.
as_tensor_variable
(
maxout
)
gz
=
tensor
.
as_tensor_variable
(
gz
)
assert
x
.
ndim
==
4
assert
maxout
.
ndim
==
4
assert
gz
.
ndim
==
4
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
x
.
ndim
-
2
if
stride
is
None
:
stride
=
ws
if
isinstance
(
pad
,
(
tuple
,
list
)):
if
tuple
(
pad
)
!=
(
0
,
0
)
and
not
self
.
ignore_border
:
if
pad
is
None
:
pad
=
(
0
,)
*
nd
elif
isinstance
(
pad
,
(
tuple
,
list
)):
if
max
(
pad
)
!=
0
and
not
self
.
ignore_border
:
raise
NotImplementedError
(
'padding works only with ignore_border=True'
)
if
isinstance
(
ws
,
(
tuple
,
list
)):
if
pad
[
0
]
>=
ws
[
0
]
or
pad
[
1
]
>=
ws
[
1
]
:
if
any
(
pad
[
i
]
>=
ws
[
i
]
for
i
in
range
(
nd
))
:
raise
NotImplementedError
(
'padding_h and padding_w must be smaller than strides'
)
ws
=
tensor
.
as_tensor_variable
(
ws
)
...
...
@@ -1070,6 +1541,7 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
assert
ws
.
ndim
==
1
assert
stride
.
ndim
==
1
assert
pad
.
ndim
==
1
assert
x
.
ndim
==
maxout
.
ndim
==
gz
.
ndim
>=
nd
if
not
ws
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
if
not
stride
.
dtype
.
startswith
(
'int'
):
...
...
@@ -1081,49 +1553,56 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
def
perform
(
self
,
node
,
inp
,
out
):
x
,
maxout
,
ggx
,
ws
,
stride
,
pad
=
inp
z
,
=
out
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
2
,)
if
len
(
x
.
shape
)
!=
4
:
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
len
(
x
.
shape
)
-
2
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
nd
,)
if
len
(
x
.
shape
)
<
nd
:
raise
NotImplementedError
(
'DownsampleFactorMaxGradGrad requires 4D input for now'
)
'DownsampleFactorMaxGradGrad requires input '
'with {} or more dimensions'
.
format
(
nd
))
if
(
z
[
0
]
is
None
)
or
(
z
[
0
]
.
shape
!=
maxout
.
shape
):
z
[
0
]
=
numpy
.
zeros
(
maxout
.
shape
,
dtype
=
x
.
dtype
)
ggz
=
z
[
0
]
# grad wrt maxout_grad has the same shape as maxout
# number of pooling output rows
pr
=
ggz
.
shape
[
-
2
]
# number of pooling output cols
pc
=
ggz
.
shape
[
-
1
]
ws0
,
ws1
=
ws
st0
,
st1
=
stride
pd0
,
pd1
=
pad
img_rows
=
x
.
shape
[
-
2
]
+
2
*
pd0
img_cols
=
x
.
shape
[
-
1
]
+
2
*
pd1
# size of pooling output
pool_out_shp
=
ggz
.
shape
[
-
nd
:]
img_shp
=
tuple
(
x
.
shape
[
-
nd
+
i
]
+
2
*
pad
[
i
]
for
i
in
xrange
(
nd
))
# pad the image and its gradients
if
pd0
!=
0
and
pd1
!=
0
:
y_padded
=
numpy
.
zeros
(
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
dtype
=
x
.
dtype
)
+
x
.
min
()
-
1
y_padded
[:,
:,
pd0
:(
img_rows
-
pd0
),
pd1
:(
img_cols
-
pd1
)]
=
x
ggx_padded
=
numpy
.
zeros
(
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
dtype
=
x
.
dtype
)
ggx_padded
[:,
:,
pd0
:(
img_rows
-
pd0
),
pd1
:(
img_cols
-
pd1
)]
=
ggx
if
max
(
pad
)
>
0
:
y_padded
=
numpy
.
zeros
(
x
.
shape
[:
-
nd
]
+
img_shp
,
dtype
=
x
.
dtype
)
y_padded
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
=
x
ggx_padded
=
numpy
.
zeros
(
x
.
shape
[:
-
nd
]
+
img_shp
,
dtype
=
x
.
dtype
)
ggx_padded
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
=
ggx
else
:
y_padded
=
x
ggx_padded
=
ggx
for
n
in
xrange
(
x
.
shape
[
0
]):
for
k
in
xrange
(
x
.
shape
[
1
]):
for
r
in
xrange
(
pr
):
row_st
=
r
*
st0
row_end
=
builtins
.
min
(
row_st
+
ws0
,
img_rows
)
for
c
in
xrange
(
pc
):
col_st
=
c
*
st1
col_end
=
builtins
.
min
(
col_st
+
ws1
,
img_cols
)
for
row_ind
in
xrange
(
row_st
,
row_end
):
for
col_ind
in
xrange
(
col_st
,
col_end
):
if
(
maxout
[
n
,
k
,
r
,
c
]
==
y_padded
[
n
,
k
,
row_ind
,
col_ind
]):
ggz
[
n
,
k
,
r
,
c
]
=
ggx_padded
[
n
,
k
,
row_ind
,
col_ind
]
# precompute the region boundaries for each dimension
region_ranges
=
[[]
for
i
in
xrange
(
nd
)]
for
i
in
xrange
(
nd
):
for
j
in
xrange
(
pool_out_shp
[
i
]):
start
=
j
*
stride
[
i
]
end
=
builtins
.
min
(
start
+
ws
[
i
],
img_shp
[
i
])
region_ranges
[
i
]
.
append
(
xrange
(
start
,
end
))
# iterate over non-pooling dimensions
for
k
in
numpy
.
ndindex
(
*
x
.
shape
[:
-
nd
]):
ggxk
=
ggx_padded
[
k
]
ggzk
=
ggz
[
k
]
yk
=
y_padded
[
k
]
maxoutk
=
maxout
[
k
]
# iterate over pooling regions
for
r
in
numpy
.
ndindex
(
*
pool_out_shp
):
# iterate inside region
maxout_value
=
maxoutk
[
r
]
for
c
in
itertools
.
product
(
*
[
region_ranges
[
i
][
r
[
i
]]
for
i
in
xrange
(
nd
)]):
if
maxout_value
==
yk
[
c
]:
ggzk
[
r
]
+=
ggxk
[
c
]
def
infer_shape
(
self
,
node
,
in_shapes
):
return
[
in_shapes
[
1
]]
...
...
@@ -1133,8 +1612,9 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
gz
,
=
grads
return
[
theano
.
tensor
.
zeros_like
(
x
),
theano
.
tensor
.
zeros_like
(
maxout
),
MaxPoolGrad
(
ignore_border
=
self
.
ignore_border
)(
x
,
maxout
,
gz
,
ws
,
stride
,
pad
),
MaxPoolGrad
(
ignore_border
=
self
.
ignore_border
,
ndim
=
self
.
ndim
)(
x
,
maxout
,
gz
,
ws
,
stride
,
pad
),
DisconnectedType
()(),
DisconnectedType
()(),
DisconnectedType
()()]
...
...
@@ -1147,106 +1627,182 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
raise
theano
.
gof
.
utils
.
MethodNotDefined
()
x
,
maxout
,
ggx
,
ws
,
stride
,
pad
=
inp
z
,
=
out
# the grad of grad
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
node
.
inputs
[
0
]
.
ndim
-
2
total_ndim
=
node
.
inputs
[
0
]
.
ndim
non_pool_ndim
=
total_ndim
-
nd
fail
=
sub
[
'fail'
]
ignore_border
=
int
(
self
.
ignore_border
)
if
self
.
openmp
:
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)'
# run in parallel over each pooling block
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)'
else
:
omp_parallel
=
''
return
"""
int ws0, ws1, st0, st1, pd0, pd1;
ccode
=
"""
int z_typenum = PyArray_ObjectType((PyObject*)
%(maxout)
s, 0);
int z_r, z_c;
int r, c; // shape of the padded_input
if(PyArray_DIM(
%(ws)
s, 0)!=2)
int z[
%(nd)
s]; // shape of the output
int r[
%(nd)
s]; // shape of the padded_input
int ws[
%(nd)
s];
int st[
%(nd)
s];
int pd[
%(nd)
s];
if(PyArray_DIM(
%(ws)
s, 0)!=
%(nd)
s)
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
%(nd)
s
");
%(fail)
s;
}
if(PyArray_DIM(
%(stride)
s, 0)!=
2
)
if(PyArray_DIM(
%(stride)
s, 0)!=
%(nd)
s
)
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
%(nd)
s
");
%(fail)
s;
}
if(PyArray_DIM(
%(pad)
s, 0)!=
2
)
if(PyArray_DIM(
%(pad)
s, 0)!=
%(nd)
s
)
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
%(nd)
s
");
%(fail)
s;
}
// Getting ws, stride and pad
ws0 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 1));
z_r = PyArray_DIMS(
%(maxout)
s)[2];
z_c = PyArray_DIMS(
%(maxout)
s)[3];
r = PyArray_DIMS(
%(x)
s)[2];
c = PyArray_DIMS(
%(x)
s)[3];
r += pd0 * 2;
c += pd1 * 2;
// allocating memory for output
if ((!
%(z)
s)
|| !PyArray_ISCONTIGUOUS(
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=4
||(PyArray_DIMS(
%(z)
s)[0] != PyArray_DIMS(
%(maxout)
s)[0])
||(PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(maxout)
s)[1])
||(PyArray_DIMS(
%(z)
s)[2] != PyArray_DIMS(
%(maxout)
s)[2])
||(PyArray_DIMS(
%(z)
s)[3] != PyArray_DIMS(
%(maxout)
s)[3])
)
for (int i=0; i<
%(nd)
s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, i));
z[i] = PyArray_DIMS(
%(maxout)
s)[
%(non_pool_ndim)
s + i];
r[i] = PyArray_DIMS(
%(x)
s)[
%(non_pool_ndim)
s + i] + 2 * pd[i];
}
// allocating memory for output, if necessary
int mem_nec;
mem_nec = 0;
if ((!
%(z)
s) || !PyArray_ISCONTIGUOUS(
%(z)
s)
|| *PyArray_DIMS(
%(z)
s)!=
%(total_ndim)
s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<
%(total_ndim)
s; i++)
{
if (PyArray_DIMS(
%(z)
s)[i] != PyArray_DIMS(
%(maxout)
s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
Py_XDECREF(
%(z)
s);
%(z)
s = (PyArrayObject*) PyArray_ZEROS(
4
, PyArray_DIMS(
%(maxout)
s), z_typenum,0);
%(z)
s = (PyArrayObject*) PyArray_ZEROS(
%(total_ndim)
s
, PyArray_DIMS(
%(maxout)
s), z_typenum,0);
}
else {
PyArray_FILLWBYTE(
%(z)
s, 0);
}
dtype_
%(maxout)
s maximum; // temp var for maximum value in a region
int r_st, r_end, c_st, c_end;
// will be used to hold start and end index of a region
int r_st[
%(nd)
s];
int r_end[
%(nd)
s];
// index for iterating over the pooling regions
int r_idx[
%(nd)
s];
// placeholder for PyArray indexing (output)
long int o_idx[
%(total_ndim)
s];
// placeholder for PyArray indexing (input)
long int i_idx[
%(total_ndim)
s];
// loop over non-pooling dimensions
int non_pooling_prod;
non_pooling_prod = 1;
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
non_pooling_prod *= PyArray_DIMS(
%(x)
s)[i];
}
%(omp_parallel)
s
for(int t = 0; t < PyArray_DIMS(
%(x)
s)[0] * PyArray_DIMS(
%(x)
s)[1]; t++){
int b = t
%%
PyArray_DIMS(
%(x)
s)[0];
int k = t / PyArray_DIMS(
%(x)
s)[0];
for(int i=0; i < z_r; i++){
r_st = i * st0;
r_end = r_st + ws0;
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (
%(non_pool_ndim)
s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<
%(non_pool_ndim)
s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(
%(x)
s)[i - 1];
o_idx[i - 1] = o_idx[i - 1]
%%
PyArray_DIMS(
%(x)
s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
for (r_idx[
%(i)
s]=0; r_idx[
%(i)
s] < z[
%(i)
s]; r_idx[
%(i)
s]++) {
r_st[
%(i)
s] = r_idx[
%(i)
s] * st[
%(i)
s];
r_end[
%(i)
s] = r_st[
%(i)
s] + ws[
%(i)
s];
// skip the padding
r_st
= r_st < pd0 ? pd0 : r_st
;
r_end
= r_end > (r - pd0) ? r - pd0 : r_end
;
r_st
[
%(i)
s] = r_st[
%(i)
s] < pd[
%(i)
s] ? pd[
%(i)
s] : r_st[
%(i)
s]
;
r_end
[
%(i)
s] = r_end[
%(i)
s] > (r[
%(i)
s] - pd[
%(i)
s]) ? r[
%(i)
s] - pd[
%(i)
s] : r_end[
%(i)
s]
;
// from padded_img space to img space
r_st -= pd0;
r_end -= pd0;
for(int j=0; j<z_c; j++){
c_st = j * st1;
c_end = c_st + ws1;
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st;
c_end = c_end > (c - pd1) ? c - pd1 : c_end;
// from padding_img space into img space
c_st -= pd1;
c_end -= pd1;
r_st[
%(i)
s] -= pd[
%(i)
s];
r_end[
%(i)
s] -= pd[
%(i)
s];
// use the index to find the correct position in the output
o_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_idx[
%(i)
s];
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(z)
s * z;
if (
%(total_ndim)
s == 4)
{
// the maximum value
maximum = ((dtype_
%(maxout)
s*)(PyArray_GETPTR4(
%(maxout)
s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])))[0];
// z at this position
z = ((dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the maximum value
maximum = ((dtype_
%(maxout)
s*)(PyArray_G
ETPTR4(
%(maxout)
s,b,k,i,j
)))[0];
maximum = ((dtype_
%(maxout)
s*)(PyArray_G
etPtr(
%(maxout)
s,o_idx
)))[0];
// z at this position
dtype_
%(z)
s * z = ((dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s, b, k, i, j)));
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
z = ((dtype_
%(z)
s*)(PyArray_GetPtr(
%(z)
s,o_idx)));
}
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s++)
{
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(x)
s a;
dtype_
%(ggx)
s * ggx;
if (
%(total_ndim)
s == 4)
{
for(int n=c_st; n<c_end; n++)
{
dtype_
%(x)
s a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,m,n)))[0];
dtype_
%(ggx)
s * ggx = (
(dtype_
%(ggx)
s*)(PyArray_GETPTR4(
%(ggx)
s, b, k, m, n)));
if (a == maximum){
z[0] += ggx[0];
}
}
a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
ggx = ((dtype_
%(ggx)
s*)(PyArray_GETPTR4(
%(ggx)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
}
}
}
}
"""
%
locals
()
else
{
a = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx)))[0];
ggx = ((dtype_
%(ggx)
s*)(PyArray_GetPtr(
%(ggx)
s,i_idx)));
}
if (a == maximum){
z[0] += ggx[0];
}
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // for loop over region
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // loop over pooling dimension
"""
ccode
+=
"""
} // for loop over non-pooling dimensions
"""
return
ccode
%
locals
()
def
c_code_cache_version
(
self
):
return
(
0
,
3
,
self
.
openmp
)
return
(
0
,
4
,
self
.
openmp
)
theano/tensor/signal/tests/test_pool.py
浏览文件 @
a16e91f7
from
__future__
import
absolute_import
,
print_function
,
division
from
nose.plugins.skip
import
SkipTest
from
nose_parameterized
import
parameterized
from
itertools
import
product
import
os
import
unittest
...
...
@@ -14,7 +15,7 @@ import numpy
import
theano
import
theano.tensor
as
tensor
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.signal.pool
import
(
Pool
,
pool_2d
,
from
theano.tensor.signal.pool
import
(
Pool
,
pool_2d
,
pool_3d
,
MaxPoolGrad
,
AveragePoolGrad
,
max_pool_2d_same_size
,
DownsampleFactorMaxGradGrad
)
...
...
@@ -57,6 +58,36 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val
[
k
][
i
,
j
]
=
func
(
patch
)
return
output_val
@staticmethod
def
numpy_max_pool_nd
(
input
,
ds
,
ignore_border
=
False
,
mode
=
'max'
):
'''Helper function, implementing pool_nd in pure numpy'''
if
len
(
input
.
shape
)
<
len
(
ds
):
raise
NotImplementedError
(
'input should have at least
%
s dim,'
' shape is
%
s'
%
(
str
(
ds
),
str
(
input
.
shape
)))
nd
=
len
(
ds
)
si
=
[
0
]
*
nd
if
not
ignore_border
:
for
i
in
range
(
nd
):
if
input
.
shape
[
-
nd
+
i
]
%
ds
[
i
]:
si
[
i
]
+=
1
out_shp
=
list
(
input
.
shape
[:
-
nd
])
for
i
in
range
(
nd
):
out_shp
.
append
(
input
.
shape
[
-
nd
+
i
]
//
ds
[
i
]
+
si
[
i
])
output_val
=
numpy
.
zeros
(
out_shp
)
func
=
numpy
.
max
if
mode
==
'sum'
:
func
=
numpy
.
sum
elif
mode
!=
'max'
:
func
=
numpy
.
average
for
l
in
numpy
.
ndindex
(
*
input
.
shape
[:
-
nd
]):
for
r
in
numpy
.
ndindex
(
*
output_val
.
shape
[
-
nd
:]):
patch
=
input
[
l
][
tuple
(
slice
(
r
[
i
]
*
ds
[
i
],
(
r
[
i
]
+
1
)
*
ds
[
i
])
for
i
in
range
(
nd
))]
output_val
[
l
][
r
]
=
func
(
patch
)
return
output_val
@staticmethod
def
numpy_max_pool_2d_stride_padding
(
x
,
ds
,
ignore_border
=
True
,
st
=
None
,
padding
=
(
0
,
0
),
mode
=
'max'
):
...
...
@@ -111,6 +142,60 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val
[
k
][
i
,
j
]
=
func
(
patch
)
return
output_val
@staticmethod
def
numpy_max_pool_nd_stride_padding
(
input
,
ds
,
ignore_border
=
True
,
st
=
None
,
padding
=
None
,
mode
=
'max'
):
assert
ignore_border
nd
=
len
(
ds
)
if
padding
is
None
:
padding
=
(
0
,)
*
nd
if
st
is
None
:
st
=
(
0
,)
*
nd
assert
len
(
padding
)
==
len
(
ds
)
==
len
(
st
)
assert
all
(
ds
[
i
]
>
padding
[
i
]
for
i
in
range
(
nd
))
def
pad_img
(
x
):
# initialize padded input
y
=
numpy
.
zeros
(
x
.
shape
[
0
:
-
nd
]
+
tuple
(
x
.
shape
[
-
nd
+
i
]
+
padding
[
i
]
*
2
for
i
in
range
(
nd
)),
dtype
=
x
.
dtype
)
# place the unpadded input in the center
block
=
((
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
padding
[
i
],
x
.
shape
[
-
nd
+
i
]
+
padding
[
i
])
for
i
in
range
(
nd
)))
y
[
block
]
=
x
return
y
pad_img_shp
=
list
(
input
.
shape
[:
-
nd
])
out_shp
=
list
(
input
.
shape
[:
-
nd
])
for
i
in
range
(
nd
):
padded_size
=
input
.
shape
[
-
nd
+
i
]
+
2
*
padding
[
i
]
pad_img_shp
.
append
(
padded_size
)
out_shp
.
append
((
padded_size
-
ds
[
i
])
//
st
[
i
]
+
1
)
output_val
=
numpy
.
zeros
(
out_shp
)
padded_input
=
pad_img
(
input
)
func
=
numpy
.
max
if
mode
==
'sum'
:
func
=
numpy
.
sum
elif
mode
!=
'max'
:
func
=
numpy
.
average
inc_pad
=
mode
==
'average_inc_pad'
for
l
in
numpy
.
ndindex
(
*
input
.
shape
[:
-
nd
]):
for
r
in
numpy
.
ndindex
(
*
output_val
.
shape
[
-
nd
:]):
region
=
[]
for
i
in
range
(
nd
):
r_st
=
r
[
i
]
*
st
[
i
]
r_end
=
builtins
.
min
(
r_st
+
ds
[
i
],
pad_img_shp
[
-
nd
+
i
])
if
not
inc_pad
:
r_st
=
builtins
.
max
(
r_st
,
padding
[
i
])
r_end
=
builtins
.
min
(
r_end
,
input
.
shape
[
-
nd
+
i
]
+
padding
[
i
])
region
.
append
(
slice
(
r_st
,
r_end
))
patch
=
padded_input
[
l
][
region
]
output_val
[
l
][
r
]
=
func
(
patch
)
return
output_val
@staticmethod
def
numpy_max_pool_2d_stride
(
input
,
ds
,
ignore_border
=
False
,
st
=
None
,
mode
=
'max'
):
...
...
@@ -174,84 +259,167 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val
[
k
][
i
,
j
]
=
func
(
patch
)
return
output_val
@staticmethod
def
numpy_max_pool_nd_stride
(
input
,
ds
,
ignore_border
=
False
,
st
=
None
,
mode
=
'max'
):
'''Helper function, implementing pooling in pure numpy
this function provides st input to indicate the stide size
for the pooling regions. if not indicated, st == sd.'''
nd
=
len
(
ds
)
if
st
is
None
:
st
=
ds
assert
len
(
st
)
==
len
(
ds
)
out_shp
=
list
(
input
.
shape
[:
-
nd
])
for
i
in
range
(
nd
):
out
=
0
if
input
.
shape
[
-
nd
+
i
]
-
ds
[
i
]
>=
0
:
out
=
(
input
.
shape
[
-
nd
+
i
]
-
ds
[
i
])
//
st
[
i
]
+
1
if
not
ignore_border
:
if
out
>
0
:
if
input
.
shape
[
-
nd
+
i
]
-
((
out
-
1
)
*
st
[
i
]
+
ds
[
i
])
>
0
:
if
input
.
shape
[
-
nd
+
i
]
-
out
*
st
[
i
]
>
0
:
out
+=
1
else
:
if
input
.
shape
[
-
nd
+
i
]
>
0
:
out
+=
1
out_shp
.
append
(
out
)
func
=
numpy
.
max
if
mode
==
'sum'
:
func
=
numpy
.
sum
elif
mode
!=
'max'
:
func
=
numpy
.
average
output_val
=
numpy
.
zeros
(
out_shp
)
for
l
in
numpy
.
ndindex
(
*
input
.
shape
[:
-
nd
]):
for
r
in
numpy
.
ndindex
(
*
output_val
.
shape
[
-
nd
:]):
region
=
[]
for
i
in
range
(
nd
):
r_st
=
r
[
i
]
*
st
[
i
]
r_end
=
builtins
.
min
(
r_st
+
ds
[
i
],
input
.
shape
[
-
nd
+
i
])
region
.
append
(
slice
(
r_st
,
r_end
))
patch
=
input
[
l
][
region
]
output_val
[
l
][
r
]
=
func
(
patch
)
return
output_val
def
test_DownsampleFactorMax
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
# generate random images
maxpoolshps
=
((
1
,
1
),
(
2
,
2
),
(
3
,
3
),
(
2
,
3
))
imval
=
rng
.
rand
(
4
,
2
,
16
,
16
)
images
=
tensor
.
dtensor4
()
for
maxpoolshp
,
ignore_border
,
mode
in
product
(
maxpoolshps
,
[
True
,
False
],
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
# Pure Numpy computation
numpy_output_val
=
self
.
numpy_max_pool_2d
(
imval
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
# maxpool, input size
examples
=
(
((
2
,),
(
16
,)),
((
2
,),
(
4
,
16
,)),
((
2
,),
(
4
,
2
,
16
,)),
((
1
,
1
),
(
4
,
2
,
16
,
16
)),
((
2
,
2
),
(
4
,
2
,
16
,
16
)),
((
3
,
3
),
(
4
,
2
,
16
,
16
)),
((
3
,
2
),
(
4
,
2
,
16
,
16
)),
((
3
,
2
,
2
),
(
3
,
2
,
16
,
16
,
16
)),
((
2
,
3
,
2
),
(
3
,
2
,
16
,
16
,
16
)),
((
2
,
2
,
3
),
(
3
,
2
,
16
,
16
,
16
)),
((
2
,
2
,
3
,
2
),
(
3
,
2
,
6
,
6
,
6
,
5
)),
)
for
example
,
ignore_border
,
mode
in
product
(
examples
,
[
True
,
False
],
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
(
maxpoolshp
,
inputsize
)
=
example
imval
=
rng
.
rand
(
*
inputsize
)
images
=
theano
.
shared
(
imval
)
# Pure Numpy computation
numpy_output_val
=
self
.
numpy_max_pool_nd
(
imval
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
# The pool_2d or pool_3d helper methods
if
len
(
maxpoolshp
)
==
2
:
output
=
pool_2d
(
images
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
f
=
function
([
images
,
],
[
output
,
])
output_val
=
f
(
imval
)
f
=
function
([],
[
output
,
])
output_val
=
f
()
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
# Pool op
maxpool_op
=
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
images
,
maxpoolshp
)
output_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
ignore_border
=
ignore_border
)
utt
.
assert_allclose
(
numpy
.
asarray
(
output_shape
),
numpy_output_val
.
shape
)
f
=
function
([
images
],
maxpool_op
)
output_val
=
f
(
imval
)
elif
len
(
maxpoolshp
)
==
3
:
output
=
pool_3d
(
images
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
f
=
function
([],
[
output
,
])
output_val
=
f
()
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
# Pool op
maxpool_op
=
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)(
images
,
maxpoolshp
)
output_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)
utt
.
assert_allclose
(
numpy
.
asarray
(
output_shape
),
numpy_output_val
.
shape
)
f
=
function
([],
maxpool_op
)
output_val
=
f
()
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
def
test_DownsampleFactorMaxStride
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
),
(
3
,
3
),
(
5
,
3
),
(
16
,
16
))
stridesizes
=
((
1
,
1
),
(
3
,
3
),
(
5
,
7
),)
# generate random images
imval
=
rng
.
rand
(
4
,
10
,
16
,
16
)
# The same for each mode
outputshps
=
(
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
),
(
4
,
10
,
4
,
3
),
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
),
(
4
,
10
,
4
,
3
),
(
4
,
10
,
14
,
14
),
(
4
,
10
,
5
,
5
),
(
4
,
10
,
3
,
2
),
(
4
,
10
,
14
,
14
),
(
4
,
10
,
6
,
6
),
(
4
,
10
,
4
,
3
),
(
4
,
10
,
12
,
14
),
(
4
,
10
,
4
,
5
),
(
4
,
10
,
3
,
2
),
(
4
,
10
,
12
,
14
),
(
4
,
10
,
5
,
6
),
(
4
,
10
,
4
,
3
),
(
4
,
10
,
1
,
1
),
(
4
,
10
,
1
,
1
),
(
4
,
10
,
1
,
1
),
(
4
,
10
,
1
,
1
),
(
4
,
10
,
1
,
1
),
(
4
,
10
,
1
,
1
),)
images
=
tensor
.
dtensor4
()
indx
=
0
for
mode
,
maxpoolshp
,
ignore_border
in
product
([
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
],
maxpoolshps
,
[
True
,
False
]):
for
stride
in
stridesizes
:
outputshp
=
outputshps
[
indx
%
len
(
outputshps
)]
indx
+=
1
# Pool op
numpy_output_val
=
\
self
.
numpy_max_pool_2d_stride
(
imval
,
maxpoolshp
,
ignore_border
,
stride
,
mode
)
assert
numpy_output_val
.
shape
==
outputshp
,
(
"outshape is
%
s, calculated shape is
%
s"
%
(
outputshp
,
numpy_output_val
.
shape
))
maxpool_op
=
\
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
images
,
maxpoolshp
,
stride
)
f
=
function
([
images
],
maxpool_op
)
output_val
=
f
(
imval
)
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
# maxpool, stride, ignore_border, input, output sizes
examples
=
(
((
1
,
1
),
(
1
,
1
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
16
,
16
)),
((
1
,
1
),
(
3
,
3
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
)),
((
1
,
1
),
(
5
,
7
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
3
)),
((
1
,
1
),
(
1
,
1
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
16
,
16
)),
((
1
,
1
),
(
3
,
3
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
)),
((
1
,
1
),
(
5
,
7
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
3
)),
((
3
,
3
),
(
1
,
1
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
14
,
14
)),
((
3
,
3
),
(
3
,
3
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
5
,
5
)),
((
3
,
3
),
(
5
,
7
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
3
,
2
)),
((
3
,
3
),
(
1
,
1
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
14
,
14
)),
((
3
,
3
),
(
3
,
3
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
)),
((
3
,
3
),
(
5
,
7
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
3
)),
((
5
,
3
),
(
1
,
1
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
12
,
14
)),
((
5
,
3
),
(
3
,
3
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
5
)),
((
5
,
3
),
(
5
,
7
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
3
,
2
)),
((
5
,
3
),
(
1
,
1
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
12
,
14
)),
((
5
,
3
),
(
3
,
3
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
5
,
6
)),
((
5
,
3
),
(
5
,
7
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
3
)),
((
16
,
16
),
(
1
,
1
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
3
,
3
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
5
,
7
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
1
,
1
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
3
,
3
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
5
,
7
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
3
,),
(
5
,),
True
,
(
16
,),
(
3
,)),
((
3
,),
(
5
,),
True
,
(
2
,
16
,),
(
2
,
3
,)),
((
5
,),
(
3
,),
True
,
(
2
,
3
,
16
,),
(
2
,
3
,
4
,)),
((
5
,
1
,
3
),
(
3
,
3
,
3
),
True
,
(
2
,
16
,
16
,
16
),
(
2
,
4
,
6
,
5
)),
((
5
,
1
,
3
),
(
3
,
3
,
3
),
True
,
(
4
,
2
,
16
,
16
,
16
),
(
4
,
2
,
4
,
6
,
5
)),
)
for
example
,
mode
in
product
(
examples
,
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
(
maxpoolshp
,
stride
,
ignore_border
,
inputshp
,
outputshp
)
=
example
# generate random images
imval
=
rng
.
rand
(
*
inputshp
)
images
=
theano
.
shared
(
imval
)
# Pool op
numpy_output_val
=
\
self
.
numpy_max_pool_nd_stride
(
imval
,
maxpoolshp
,
ignore_border
,
stride
,
mode
)
assert
numpy_output_val
.
shape
==
outputshp
,
(
"outshape is
%
s, calculated shape is
%
s"
%
(
outputshp
,
numpy_output_val
.
shape
))
maxpool_op
=
\
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)(
images
,
maxpoolshp
,
stride
)
f
=
function
([],
maxpool_op
)
output_val
=
f
()
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
def
test_DownsampleFactorMaxStrideExtra
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
...
...
@@ -287,7 +455,8 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
"outshape is
%
s, calculated shape is
%
s"
%
(
outputshp
,
numpy_output_val
.
shape
))
maxpool_op
=
\
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
Pool
(
ignore_border
=
ignore_border
,
ndim
=
len
(
maxpoolshp
),
mode
=
mode
)(
images
,
maxpoolshp
,
stride
)
f
=
function
([
images
],
maxpool_op
)
output_val
=
f
(
imval
)
...
...
@@ -296,319 +465,351 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
def
test_DownsampleFactorMaxPaddingStride
(
self
):
ignore_border
=
True
# padding does not support ignore_border=False
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolsizes
=
[(
3
,
3
),
(
4
,
4
),
(
3
,
4
),
(
4
,
3
),
(
2
,
2
)]
stridesizes
=
[(
2
,
2
),
(
2
,
2
),
(
1
,
1
),
(
1
,
2
),
(
2
,
2
)]
paddingsizes
=
[(
2
,
2
),
(
1
,
2
),
(
2
,
1
),
(
0
,
0
),
(
1
,
1
)]
imgsizes
=
[(
5
,
5
),
(
5
,
5
),
(
5
,
6
),
(
6
,
5
),
(
5
,
5
)]
m
=
4
# minibatch
c
=
2
# channel size
images
=
tensor
.
dtensor4
()
for
indx
,
mode
in
product
(
numpy
.
arange
(
len
(
maxpoolsizes
)),
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
imgsize
=
imgsizes
[
indx
]
imval
=
rng
.
rand
(
m
,
c
,
imgsize
[
0
],
imgsize
[
1
])
-
0.5
stridesize
=
stridesizes
[
indx
]
maxpoolsize
=
maxpoolsizes
[
indx
]
paddingsize
=
paddingsizes
[
indx
]
numpy_output_val
=
self
.
numpy_max_pool_2d_stride_padding
(
imval
,
maxpoolsize
,
ignore_border
,
# maxpool, stride, padding, input sizes
examples
=
(
((
3
,),
(
2
,),
(
2
,),
(
5
,)),
((
3
,),
(
2
,),
(
2
,),
(
4
,
5
)),
((
3
,),
(
2
,),
(
2
,),
(
4
,
2
,
5
)),
((
3
,),
(
2
,),
(
2
,),
(
4
,
2
,
5
,
5
)),
((
3
,
3
),
(
2
,
2
),
(
2
,
2
),
(
4
,
2
,
5
,
5
)),
((
4
,
4
),
(
2
,
2
),
(
1
,
2
),
(
4
,
2
,
5
,
5
)),
((
3
,
4
),
(
1
,
1
),
(
2
,
1
),
(
4
,
2
,
5
,
6
)),
((
4
,
3
),
(
1
,
2
),
(
0
,
0
),
(
4
,
2
,
6
,
5
)),
((
2
,
2
),
(
2
,
2
),
(
1
,
1
),
(
4
,
2
,
5
,
5
)),
((
4
,
3
,
2
),
(
1
,
2
,
2
),
(
0
,
2
,
1
),
(
4
,
6
,
6
,
5
)),
((
4
,
3
,
2
),
(
1
,
2
,
2
),
(
0
,
2
,
1
),
(
4
,
2
,
6
,
5
,
5
)),
)
for
example
,
mode
in
product
(
examples
,
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
(
maxpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
=
example
imval
=
rng
.
rand
(
*
inputsize
)
-
0.5
images
=
theano
.
shared
(
imval
)
numpy_output_val
=
self
.
numpy_max_pool_nd_stride_padding
(
imval
,
maxpoolshp
,
ignore_border
,
stridesize
,
paddingsize
,
mode
)
maxpool_op
=
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
images
,
maxpoolsize
,
stridesize
,
paddingsize
)
f
=
function
([
images
],
maxpool_op
)
output_val
=
f
(
imval
)
maxpool_op
=
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)(
images
,
maxpoolshp
,
stridesize
,
paddingsize
)
f
=
function
([],
maxpool_op
)
output_val
=
f
()
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
def
test_DownsampleFactorMaxPaddingStride_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
imgsizes
=
((
10
,
10
),
(
10
,
5
),
(
5
,
5
))
maxpoolsizes
=
((
5
,
3
),
(
3
,
5
),
(
3
,
3
))
stridesizes
=
((
3
,
2
),
(
2
,
3
),
(
3
,
3
))
paddingsizes
=
((
2
,
2
),
(
2
,
1
),
(
2
,
2
))
# maxpool, stride, padding, input sizes
examples
=
(
((
10
,),
(
5
,),
(
3
,),
(
2
,)),
((
10
,),
(
5
,),
(
3
,),
(
2
,
2
)),
((
10
,),
(
5
,),
(
3
,),
(
1
,
1
,
2
)),
((
10
,
10
),
(
5
,
3
),
(
3
,
2
),
(
1
,
1
,
2
,
2
)),
((
10
,
5
),
(
3
,
5
),
(
2
,
3
),
(
1
,
1
,
2
,
1
)),
((
5
,
5
),
(
3
,
3
),
(
3
,
3
),
(
1
,
1
,
2
,
2
)),
((
5
,
5
,
5
),
(
3
,
3
,
3
),
(
3
,
3
,
3
),
(
1
,
1
,
2
,
2
,
2
)),
)
# average_inc_pad and average_exc_pad do not
# support grad with padding
for
mode
in
[
'max'
,
'sum'
]:
for
i
in
range
(
len
(
imgsizes
)):
imgsize
=
imgsizes
[
i
]
imval
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
maxpoolsize
=
maxpoolsizes
[
i
]
stridesize
=
stridesizes
[
i
]
paddingsize
=
paddingsizes
[
i
]
for
example
in
examples
:
(
maxpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
=
example
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
def
mp
(
input
):
return
Pool
(
ignore_border
=
True
,
mode
=
mode
)(
input
,
maxpoolsize
,
stridesize
,
paddingsize
)
return
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
True
,
mode
=
mode
,
)(
input
,
maxpoolshp
,
stridesize
,
paddingsize
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_DownsampleFactorMax_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
),
(
3
,
2
),
(
2
,
3
))
imval
=
rng
.
rand
(
2
,
3
,
3
,
4
)
*
10.0
# more variance means numeric gradient will be more accurate
for
maxpoolshp
,
ignore_border
,
mode
in
product
(
maxpoolshps
,
[
True
,
False
],
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
# maxpool, input sizes
examples
=
(
((
2
,),
(
3
,)),
((
2
,),
(
2
,
3
)),
((
2
,),
(
2
,
3
,
3
)),
((
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
1
,
1
,
1
),
(
2
,
3
,
3
)),
((
3
,
2
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
,
2
),
(
2
,
3
,
3
,
4
,
4
)),
((
2
,
2
,
3
),
(
2
,
3
,
3
,
4
,
4
)),
)
for
example
,
ignore_border
,
mode
in
product
(
examples
,
[
True
,
False
],
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
(
maxpoolshp
,
inputsize
)
=
example
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
# more variance means numeric gradient will be more accurate
def
mp
(
input
):
return
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
input
,
maxpoolshp
)
return
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)(
input
,
maxpoolshp
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_DownsampleFactorMax_grad_st
(
self
):
# pool, stride, input sizes
pool_grad_st_examples
=
(
((
1
,),
(
1
,),
(
16
,)),
((
1
,),
(
3
,),
(
1
,
16
)),
((
1
,),
(
5
,),
(
1
,
2
,
16
)),
((
2
,),
(
1
,),
(
16
,)),
((
2
,),
(
3
,),
(
1
,
16
)),
((
2
,),
(
5
,),
(
1
,
2
,
16
)),
((
1
,
1
),
(
1
,
1
),
(
1
,
2
,
16
,
16
)),
((
1
,
1
),
(
3
,
3
),
(
1
,
2
,
16
,
16
)),
((
1
,
1
),
(
5
,
7
),
(
1
,
2
,
16
,
16
)),
((
3
,
3
),
(
1
,
1
),
(
1
,
2
,
16
,
16
)),
((
3
,
3
),
(
3
,
3
),
(
1
,
2
,
16
,
16
)),
((
3
,
3
),
(
5
,
7
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
1
,
1
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
3
,
3
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
5
,
7
),
(
1
,
2
,
16
,
16
)),
((
5
,
1
,
2
),
(
1
,
1
,
1
),
(
16
,
3
,
16
)),
((
5
,
1
,
2
),
(
3
,
1
,
2
),
(
1
,
16
,
3
,
16
)),
((
5
,
1
,
2
),
(
5
,
1
,
4
),
(
1
,
2
,
16
,
3
,
16
)),
((
5
,
3
),
(
3
,
2
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
7
,
5
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
10
,
6
),
(
1
,
2
,
16
,
16
)),
((
5
,
5
),
(
1
,
1
),
(
1
,
2
,
8
,
5
)),
((
3
,
2
),
(
2
,
3
),
(
1
,
2
,
8
,
5
)),
((
7
,
7
),
(
10
,
10
),
(
1
,
2
,
8
,
5
)),
((
9
,
9
),
(
1
,
1
),
(
1
,
2
,
8
,
5
)),
)
@parameterized.expand
(
product
(
pool_grad_st_examples
,
[
True
,
False
],
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]),
testcase_func_name
=
utt
.
custom_name_func
)
def
test_DownsampleFactorMax_grad_st
(
self
,
example
,
ignore_border
,
mode
):
# checks the gradient for the case that stride is used
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
),
(
3
,
3
),
(
5
,
3
))
stridesizes
=
((
1
,
1
),
(
3
,
3
),
(
5
,
7
))
imval
=
rng
.
rand
(
1
,
2
,
16
,
16
)
for
maxpoolshp
,
ignore_border
,
mode
,
stride
in
product
(
maxpoolshps
,
[
True
,
False
],
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
],
stridesizes
):
def
mp
(
input
):
return
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
input
,
maxpoolshp
,
stride
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_DownsampleFactorMax_grad_st_extra
(
self
):
# checks the gradient for the case
# that stride is used for extra examples
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
5
,
3
),
(
5
,
3
),
(
5
,
3
),
(
5
,
5
),
(
3
,
2
),
(
7
,
7
),
(
9
,
9
))
stridesizes
=
((
3
,
2
),
(
7
,
5
),
(
10
,
6
),
(
1
,
1
),
(
2
,
3
),
(
10
,
10
),
(
1
,
1
))
imvsizs
=
((
16
,
16
),
(
16
,
16
),
(
16
,
16
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
))
(
maxpoolshp
,
stridesize
,
inputsize
)
=
example
imval
=
rng
.
rand
(
*
inputsize
)
for
mode
in
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
for
indx
in
numpy
.
arange
(
len
(
maxpoolshps
)):
imvsize
=
imvsizs
[
indx
]
imval
=
rng
.
rand
(
1
,
2
,
imvsize
[
0
],
imvsize
[
1
])
stride
=
stridesizes
[
indx
]
maxpoolshp
=
maxpoolshps
[
indx
]
for
ignore_border
in
[
True
,
False
]:
def
mp
(
input
):
return
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
input
,
maxpoolshp
,
stride
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
mp
(
input
):
return
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)(
input
,
maxpoolshp
,
stridesize
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_DownsampleFactorMaxGrad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
),
(
3
,
2
),
(
2
,
3
))
imval
=
rng
.
rand
(
2
,
3
,
3
,
4
)
*
10.0
# more variance means numeric gradient will be more accurate
for
maxpoolshp
in
maxpoolshps
:
# maxpool, input sizes
examples
=
(
((
2
,),
(
2
,)),
((
2
,),
(
2
,
3
)),
((
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
1
,
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
2
,
2
,
3
),
(
2
,
1
,
3
,
3
,
4
)),
)
for
(
maxpoolshp
,
inputsize
)
in
examples
:
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
# more variance means numeric gradient will be more accurate
for
ignore_border
in
[
True
,
False
]:
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
# The shape of the gradient will be the shape of the output
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
ignore_border
=
ignore_border
)
imval
.
shape
,
maxpoolshp
,
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
def
mp
(
input
,
grad
):
out
=
Pool
(
ignore_border
=
ignore_border
)(
input
,
maxpoolshp
)
grad_op
=
MaxPoolGrad
(
ignore_border
=
ignore_border
)
out
=
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)(
input
,
maxpoolshp
)
grad_op
=
MaxPoolGrad
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_AveragePoolGrad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
avgpoolshps
=
((
1
,
1
),
(
3
,
2
),
(
2
,
3
))
imval
=
rng
.
rand
(
2
,
3
,
3
,
4
)
*
10.0
# more variance means numeric gradient will be more accurate
for
avgpoolshp
in
avgpoolshps
:
# avgpool, input sizes
examples
=
(
((
2
,),
(
2
,)),
((
2
,),
(
2
,
3
)),
((
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
1
,
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
2
,
2
,
3
),
(
2
,
1
,
3
,
3
,
4
)),
)
for
(
avgpoolshp
,
inputsize
)
in
examples
:
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
# more variance means numeric gradient will be more accurate
for
ignore_border
in
[
True
,
False
]:
for
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
# The shape of the gradient will be the shape of the output
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
avgpoolshp
,
ignore_border
=
ignore_border
)
imval
.
shape
,
avgpoolshp
,
ndim
=
len
(
avgpoolshp
),
ignore_border
=
ignore_border
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
def
mp
(
input
,
grad
):
grad_op
=
AveragePoolGrad
(
ignore_border
=
ignore_border
,
mode
=
mode
)
grad_op
=
AveragePoolGrad
(
ndim
=
len
(
avgpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolshp
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_DownsampleFactorMaxGrad_grad_st
(
self
):
@parameterized.expand
(
product
(
pool_grad_st_examples
,
[
True
,
False
]),
testcase_func_name
=
utt
.
custom_name_func
)
def
test_DownsampleFactorMaxGrad_grad_st
(
self
,
example
,
ignore_border
):
# checks the gradient of the gradient for
# the case that stride is used
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
),
(
3
,
3
),
(
5
,
3
))
stridesizes
=
((
1
,
1
),
(
3
,
3
),
(
5
,
7
))
imval
=
rng
.
rand
(
1
,
2
,
16
,
16
)
(
maxpoolshp
,
stride
,
inputsize
)
=
example
imval
=
rng
.
rand
(
*
inputsize
)
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
,
st
=
stride
)
for
maxpoolshp
in
maxpoolshps
:
for
ignore_border
in
[
True
,
False
]:
for
stride
in
stridesizes
:
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
ignore_border
=
ignore_border
,
st
=
stride
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
# skip the grad verification when the output is empty
if
numpy
.
prod
(
grad_shape
)
!=
0
:
grad_val
=
rng
.
rand
(
*
grad_shape
)
def
mp
(
input
,
grad
):
out
=
Pool
(
ignore_border
=
ignore_border
)(
input
,
maxpoolshp
,
stride
)
grad_op
=
MaxPoolGrad
(
ignore_border
=
ignore_border
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
,
stride
)
def
mp
(
input
,
grad
):
out
=
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)(
input
,
maxpoolshp
,
stride
)
grad_op
=
MaxPoolGrad
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
,
stride
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_AveragePoolGrad_grad_st
(
self
):
@parameterized.expand
(
product
(
pool_grad_st_examples
,
[
True
,
False
],
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]),
testcase_func_name
=
utt
.
custom_name_func
)
def
test_AveragePoolGrad_grad_st
(
self
,
example
,
ignore_border
,
mode
):
# checks the gradient of the gradient for
# the case that stride is used
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
avgpoolshps
=
((
1
,
1
),
(
3
,
3
),
(
5
,
3
))
stridesizes
=
((
1
,
1
),
(
3
,
3
),
(
5
,
7
))
imval
=
rng
.
rand
(
1
,
2
,
16
,
16
)
for
avgpoolshp
in
avgpoolshps
:
for
ignore_border
in
[
True
,
False
]:
for
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
for
stride
in
stridesizes
:
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
avgpoolshp
,
ignore_border
=
ignore_border
,
st
=
stride
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
def
mp
(
input
,
grad
):
grad_op
=
AveragePoolGrad
(
ignore_border
=
ignore_border
,
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolshp
,
stride
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_DownsampleFactorMaxGrad_grad_st_extra
(
self
):
# checks the gradient of the gradient for the case that
# stride is used for extra examples
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
5
,
3
),
(
5
,
3
),
(
5
,
3
),
(
5
,
5
),
(
3
,
2
),
(
7
,
7
),
(
9
,
9
))
stridesizes
=
((
3
,
2
),
(
7
,
5
),
(
10
,
6
),
(
1
,
1
),
(
2
,
3
),
(
10
,
10
),
(
1
,
1
))
imvsizs
=
((
16
,
16
),
(
16
,
16
),
(
16
,
16
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
))
for
indx
in
numpy
.
arange
(
len
(
maxpoolshps
)):
imvsize
=
imvsizs
[
indx
]
imval
=
rng
.
rand
(
1
,
2
,
imvsize
[
0
],
imvsize
[
1
])
stride
=
stridesizes
[
indx
]
maxpoolshp
=
maxpoolshps
[
indx
]
for
ignore_border
in
[
True
,
False
]:
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
ignore_border
=
ignore_border
,
st
=
stride
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
def
mp
(
input
,
grad
):
out
=
Pool
(
ignore_border
=
ignore_border
)(
input
,
maxpoolshp
,
stride
)
grad_op
=
MaxPoolGrad
(
ignore_border
=
ignore_border
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
,
stride
)
# skip the grad verification when the output is empty
if
numpy
.
prod
(
grad_shape
)
==
0
:
continue
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_AveragePoolGrad_grad_st_extra
(
self
):
# checks the gradient of the gradient for the case that
# stride is used for extra examples
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
avgpoolshps
=
((
5
,
3
),
(
5
,
3
),
(
5
,
3
),
(
5
,
5
),
(
3
,
2
),
(
7
,
7
),
(
9
,
9
))
stridesizes
=
((
3
,
2
),
(
7
,
5
),
(
10
,
6
),
(
1
,
1
),
(
2
,
3
),
(
10
,
10
),
(
1
,
1
))
imvsizs
=
((
16
,
16
),
(
16
,
16
),
(
16
,
16
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
))
(
avgpoolshp
,
stride
,
inputsize
)
=
example
imval
=
rng
.
rand
(
*
inputsize
)
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
avgpoolshp
,
ndim
=
len
(
avgpoolshp
),
ignore_border
=
ignore_border
,
st
=
stride
)
for
indx
in
numpy
.
arange
(
len
(
avgpoolshps
)):
imvsize
=
imvsizs
[
indx
]
imval
=
rng
.
rand
(
1
,
2
,
imvsize
[
0
],
imvsize
[
1
])
stride
=
stridesizes
[
indx
]
avgpoolshp
=
avgpoolshps
[
indx
]
for
ignore_border
in
[
True
,
False
]:
for
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
avgpoolshp
,
ignore_border
=
ignore_border
,
st
=
stride
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
# skip the grad verification when the output is empty
if
numpy
.
prod
(
grad_shape
)
!=
0
:
grad_val
=
rng
.
rand
(
*
grad_shape
)
def
mp
(
input
,
grad
):
grad_op
=
AveragePoolGrad
(
ignore_border
=
ignore_border
,
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolshp
,
stride
)
def
mp
(
input
,
grad
):
grad_op
=
AveragePoolGrad
(
ndim
=
len
(
avgpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolshp
,
stride
)
# skip the grad verification when the output is empty
if
numpy
.
prod
(
grad_shape
)
==
0
:
continue
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_DownsampleFactorMaxPaddingStride_grad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
imgsizes
=
((
10
,
10
),
(
10
,
5
),
(
5
,
5
))
maxpoolsizes
=
((
5
,
3
),
(
3
,
5
),
(
3
,
3
))
stridesizes
=
((
3
,
2
),
(
2
,
3
),
(
3
,
3
))
paddingsizes
=
((
2
,
2
),
(
2
,
1
),
(
2
,
2
))
for
i
in
range
(
len
(
imgsizes
)):
imgsize
=
imgsizes
[
i
]
imval
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
maxpoolsize
=
maxpoolsizes
[
i
]
stridesize
=
stridesizes
[
i
]
paddingsize
=
paddingsizes
[
i
]
# maxpool, stride, padding, input sizes
examples
=
(
((
3
,),
(
2
,),
(
2
,),
(
10
,)),
((
3
,),
(
2
,),
(
2
,),
(
2
,
10
,)),
((
3
,),
(
2
,),
(
2
,),
(
2
,
1
,
10
,)),
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
((
3
,
5
),
(
2
,
3
),
(
2
,
1
),
(
1
,
1
,
10
,
5
)),
((
3
,
3
),
(
3
,
3
),
(
2
,
2
),
(
1
,
1
,
5
,
5
)),
((
5
,
3
,
3
),
(
3
,
2
,
2
),
(
2
,
2
,
2
),
(
1
,
1
,
10
,
5
,
5
)),
((
3
,
5
,
3
),
(
2
,
3
,
2
),
(
2
,
1
,
2
),
(
1
,
1
,
5
,
10
,
5
)),
((
3
,
3
,
5
),
(
2
,
2
,
3
),
(
2
,
2
,
1
),
(
1
,
1
,
5
,
5
,
10
)),
)
for
(
maxpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
in
examples
:
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolsize
,
st
=
stridesize
,
maxpoolshp
,
ndim
=
len
(
maxpoolshp
),
st
=
stridesize
,
ignore_border
=
True
,
padding
=
paddingsize
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
def
mp
(
input
,
grad
):
out
=
Pool
(
ignore_border
=
True
)(
input
,
maxpoolsize
,
stridesize
,
paddingsize
)
grad_op
=
MaxPoolGrad
(
ignore_border
=
True
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolsize
,
stridesize
,
paddingsize
)
out
=
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
True
,
)(
input
,
maxpoolshp
,
stridesize
,
paddingsize
)
grad_op
=
MaxPoolGrad
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
True
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
,
stridesize
,
paddingsize
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_AveragePoolPaddingStride_grad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
imgsizes
=
((
10
,
10
),
(
10
,
5
),
(
5
,
5
))
avgpoolsizes
=
((
5
,
3
),
(
3
,
5
),
(
3
,
3
))
stridesizes
=
((
3
,
2
),
(
2
,
3
),
(
3
,
3
))
paddingsizes
=
((
2
,
2
),
(
2
,
1
),
(
2
,
2
))
for
i
in
range
(
len
(
imgsizes
)):
imgsize
=
imgsizes
[
i
]
imval
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
avgpoolsize
=
avgpoolsizes
[
i
]
stridesize
=
stridesizes
[
i
]
paddingsize
=
paddingsizes
[
i
]
# avgpool, stride, padding, input sizes
examples
=
(
((
3
,),
(
2
,),
(
2
,),
(
10
,)),
((
3
,),
(
2
,),
(
2
,),
(
2
,
10
,)),
((
3
,),
(
2
,),
(
2
,),
(
2
,
1
,
10
,)),
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
((
3
,
5
),
(
2
,
3
),
(
2
,
1
),
(
1
,
1
,
10
,
5
)),
((
3
,
3
),
(
3
,
3
),
(
2
,
2
),
(
1
,
1
,
5
,
5
)),
((
5
,
3
,
3
),
(
3
,
2
,
2
),
(
2
,
2
,
2
),
(
1
,
1
,
10
,
5
,
5
)),
((
3
,
5
,
3
),
(
2
,
3
,
2
),
(
2
,
1
,
2
),
(
1
,
1
,
5
,
10
,
5
)),
((
3
,
3
,
5
),
(
2
,
2
,
3
),
(
2
,
2
,
1
),
(
1
,
1
,
5
,
5
,
10
)),
)
for
(
avgpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
in
examples
:
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
# 'average_exc_pad' with non-zero padding is not implemented
for
mode
in
[
'sum'
,
'average_inc_pad'
]:
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
avgpoolsize
,
st
=
stridesize
,
ignore_border
=
True
,
padding
=
paddingsize
)
avgpoolshp
,
ndim
=
len
(
avgpoolshp
),
st
=
stridesize
,
ignore_border
=
True
,
padding
=
paddingsize
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
def
mp
(
input
,
grad
):
grad_op
=
AveragePoolGrad
(
ignore_border
=
True
,
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolsize
,
stridesize
,
paddingsize
)
grad_op
=
AveragePoolGrad
(
ndim
=
len
(
avgpoolshp
),
ignore_border
=
True
,
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolshp
,
stridesize
,
paddingsize
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_DownsampleFactorMax_hessian
(
self
):
...
...
@@ -630,26 +831,31 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
def
test_DownsampleFactorMaxGradGrad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
imgsizes
=
((
10
,
10
),
(
10
,
5
),
(
5
,
5
))
maxpoolsizes
=
((
5
,
3
),
(
3
,
5
),
(
3
,
3
))
stridesizes
=
((
3
,
2
),
(
2
,
3
),
(
3
,
3
))
paddingsizes
=
((
2
,
2
),
(
2
,
1
),
(
2
,
2
))
for
i
in
range
(
len
(
imgsizes
)):
imgsize
=
imgsizes
[
i
]
imval1
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
imval2
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
maxpoolsize
=
maxpoolsizes
[
i
]
stridesize
=
stridesizes
[
i
]
paddingsize
=
paddingsizes
[
i
]
# maxpool, stride, padding, input sizes
examples
=
(
((
3
,),
(
2
,),
(
2
,),
(
10
,)),
((
3
,),
(
2
,),
(
2
,),
(
2
,
10
,)),
((
3
,),
(
2
,),
(
2
,),
(
2
,
1
,
10
,)),
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
((
3
,
5
),
(
2
,
3
),
(
2
,
1
),
(
1
,
1
,
10
,
5
)),
((
3
,
3
),
(
3
,
3
),
(
2
,
2
),
(
1
,
1
,
5
,
5
)),
((
5
,
3
,
3
),
(
3
,
2
,
2
),
(
2
,
2
,
2
),
(
1
,
1
,
10
,
5
,
5
)),
((
3
,
5
,
3
),
(
2
,
3
,
2
),
(
2
,
1
,
2
),
(
1
,
1
,
5
,
10
,
5
)),
((
3
,
3
,
5
),
(
2
,
2
,
3
),
(
2
,
2
,
1
),
(
1
,
1
,
5
,
5
,
10
)),
)
for
(
maxpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
in
examples
:
imval1
=
rng
.
rand
(
*
inputsize
)
*
10.0
imval2
=
rng
.
rand
(
*
inputsize
)
*
10.0
def
mp
(
input1
,
input2
):
op1
=
Pool
(
ignore_border
=
True
)
pooled_out
=
op1
(
input1
,
maxpools
ize
,
stride
=
stridesize
,
pad
=
paddingsize
)
op2
=
DownsampleFactorMaxGradGrad
(
ignore_border
=
True
)
out
=
op2
(
input1
,
pooled_out
,
input2
,
ws
=
maxpoolsize
,
stride
=
stridesize
,
pad
=
paddingsize
)
op1
=
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
True
)
pooled_out
=
op1
(
input1
,
maxpools
hp
,
stridesize
,
paddingsize
)
op2
=
DownsampleFactorMaxGradGrad
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
True
)
out
=
op2
(
input1
,
pooled_out
,
input2
,
maxpoolshp
,
stridesize
,
paddingsize
)
return
out
utt
.
verify_grad
(
mp
,
[
imval1
,
imval2
],
rng
=
rng
)
...
...
@@ -679,6 +885,32 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
mode
=
mode
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_max_pool_3d_3D
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
,
1
),
(
3
,
2
,
1
))
imval
=
rng
.
rand
(
4
,
5
,
6
)
images
=
tensor
.
dtensor3
()
for
maxpoolshp
,
ignore_border
,
mode
in
product
(
maxpoolshps
,
[
True
,
False
],
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
numpy_output_val
=
self
.
numpy_max_pool_nd
(
imval
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
output
=
pool_3d
(
images
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
output_val
=
function
([
images
],
output
)(
imval
)
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
def
mp
(
input
):
return
pool_3d
(
input
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_max_pool_2d_2D_same_size
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
test_input_array
=
numpy
.
array
([[[
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论