Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a16e91f7
提交
a16e91f7
authored
9月 15, 2016
作者:
Gijs van Tulder
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
theano.tensor.signal.Pool with 3D support.
上级
172e699c
显示空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
2286 行增加
和
937 行删除
+2286
-937
dnn.py
theano/gpuarray/dnn.py
+60
-19
opt_util.py
theano/gpuarray/opt_util.py
+47
-2
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+283
-5
dnn.py
theano/sandbox/cuda/dnn.py
+63
-23
opt.py
theano/sandbox/cuda/opt.py
+54
-25
opt_util.py
theano/sandbox/cuda/opt_util.py
+47
-2
test_blas.py
theano/sandbox/cuda/tests/test_blas.py
+7
-5
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+102
-21
pool.py
theano/tensor/signal/pool.py
+1128
-572
test_pool.py
theano/tensor/signal/tests/test_pool.py
+495
-263
没有找到文件。
theano/gpuarray/dnn.py
浏览文件 @
a16e91f7
...
@@ -35,7 +35,7 @@ from .nnet import GpuSoftmax
...
@@ -35,7 +35,7 @@ from .nnet import GpuSoftmax
from
.opt
import
(
gpu_seqopt
,
register_opt
,
from
.opt
import
(
gpu_seqopt
,
register_opt
,
op_lifter
,
register_opt2
)
op_lifter
,
register_opt2
)
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
from
.opt_util
import
alpha_merge
,
output_merge
,
inplace_allocempty
,
pad_dims
,
unpad_dims
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
...
@@ -1253,7 +1253,7 @@ class GpuDnnPoolGrad(DnnBase):
...
@@ -1253,7 +1253,7 @@ class GpuDnnPoolGrad(DnnBase):
return
[
shape
[
0
]]
return
[
shape
[
0
]]
def
dnn_pool
(
img
,
ws
,
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)
):
def
dnn_pool
(
img
,
ws
,
stride
=
None
,
mode
=
'max'
,
pad
=
None
):
"""
"""
GPU pooling using cuDNN from NVIDIA.
GPU pooling using cuDNN from NVIDIA.
...
@@ -1267,13 +1267,13 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
...
@@ -1267,13 +1267,13 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
img
img
Images to do the pooling over.
Images to do the pooling over.
ws : tuple
ws : tuple
Subsampling window size.
Subsampling window size.
Should have 2 or 3 elements.
stride : tuple
stride : tuple
Subsampling stride (default: (1, 1)).
Subsampling stride (default: (1, 1)
or (1, 1, 1)
).
mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
pad : tuple
pad : tuple
(padX, padY) or (padX, padY, padZ)
(padX, padY) or (padX, padY, padZ)
default: (0, 0)
default: (0, 0)
or (0, 0, 0)
.. warning:: The cuDNN library only works with GPU that have a compute
.. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
capability of 3.0 or higer. This means that older GPU will not
...
@@ -1285,6 +1285,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
...
@@ -1285,6 +1285,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
"""
img
=
gpu_contiguous
(
img
)
img
=
gpu_contiguous
(
img
)
if
stride
is
None
:
stride
=
(
1
,)
*
len
(
ws
)
if
pad
is
None
:
pad
=
(
0
,)
*
len
(
ws
)
if
mode
==
"sum"
:
if
mode
==
"sum"
:
ret
=
GpuDnnPool
(
mode
=
"average_inc_pad"
)(
img
,
ws
,
stride
,
pad
)
ret
=
GpuDnnPool
(
mode
=
"average_inc_pad"
)(
img
,
ws
,
stride
,
pad
)
context_name
=
ret
.
type
.
context_name
context_name
=
ret
.
type
.
context_name
...
@@ -1868,9 +1872,18 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
...
@@ -1868,9 +1872,18 @@ def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
if
not
op
.
ignore_border
:
if
not
op
.
ignore_border
:
return
return
img
,
ws
,
stride
,
pad
=
inputs
img
,
ws
,
stride
,
pad
=
inputs
img
=
as_gpuarray_variable
(
img
,
ctx_name
)
nd
=
op
.
ndim
if
op
.
ndim
else
(
img
.
ndim
-
2
)
if
nd
not
in
(
2
,
3
):
return
img
=
gpu_contiguous
(
as_gpuarray_variable
(
img
,
ctx_name
))
mode
=
op
.
mode
mode
=
op
.
mode
return
dnn_pool
(
gpu_contiguous
(
img
),
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
if
img
.
ndim
==
nd
+
2
:
return
dnn_pool
(
img
,
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
else
:
# reshape to 4D or 5D with 2 non-pooling dimensions
img_padded
=
pad_dims
(
img
,
2
,
nd
)
ret_padded
=
dnn_pool
(
img_padded
,
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
return
unpad_dims
(
ret_padded
,
img
,
2
,
nd
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
...
@@ -1882,17 +1895,33 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
...
@@ -1882,17 +1895,33 @@ def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if
not
op
.
ignore_border
:
if
not
op
.
ignore_border
:
return
return
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
=
as_gpuarray_variable
(
inp
,
ctx_name
)
nd
=
op
.
ndim
if
op
.
ndim
else
(
inp
.
ndim
-
2
)
out
=
as_gpuarray_variable
(
out
,
ctx_name
)
if
nd
not
in
(
2
,
3
):
out_grad
=
as_gpuarray_variable
(
out_grad
,
ctx_name
)
return
inp
=
gpu_contiguous
(
as_gpuarray_variable
(
inp
,
ctx_name
))
out
=
gpu_contiguous
(
as_gpuarray_variable
(
out
,
ctx_name
))
out_grad
=
gpu_contiguous
(
as_gpuarray_variable
(
out_grad
,
ctx_name
))
mode
=
op
.
mode
mode
=
op
.
mode
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
if
inp
.
ndim
==
nd
+
2
:
gpu_contiguous
(
out
),
return
GpuDnnPoolGrad
(
mode
=
mode
)(
inp
,
gpu_contiguous
(
out_grad
),
out
,
out_grad
,
ws
,
ws
,
stride
,
stride
,
pad
)
pad
)
else
:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded
=
pad_dims
(
inp
,
2
,
nd
)
out_padded
=
pad_dims
(
out
,
2
,
nd
)
out_grad_padded
=
pad_dims
(
out_grad
,
2
,
nd
)
ret_padded
=
GpuDnnPoolGrad
(
mode
=
mode
)(
inp_padded
,
out_padded
,
out_grad_padded
,
ws
,
stride
,
pad
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
...
@@ -1904,16 +1933,28 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
...
@@ -1904,16 +1933,28 @@ def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if
not
op
.
ignore_border
:
if
not
op
.
ignore_border
:
return
return
inp
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
=
as_gpuarray_variable
(
inp
,
ctx_name
)
nd
=
op
.
ndim
if
op
.
ndim
else
(
inp
.
ndim
-
2
)
out_grad
=
as_gpuarray_variable
(
out_grad
,
ctx_name
)
if
nd
not
in
(
2
,
3
):
return
inp
=
gpu_contiguous
(
as_gpuarray_variable
(
inp
,
ctx_name
))
out_grad
=
gpu_contiguous
(
as_gpuarray_variable
(
out_grad
,
ctx_name
))
mode
=
op
.
mode
mode
=
op
.
mode
cg
=
gpu_contiguous
(
out_grad
)
if
inp
.
ndim
==
nd
+
2
:
# We reuse out_grad because cuDNN does not use the value of the `out`
# We reuse cg because cuDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
# has been observed in v2 and v3 as far as I know.
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
cg
,
cg
,
ws
,
stride
,
pad
)
return
GpuDnnPoolGrad
(
mode
=
mode
)(
inp
,
out_grad
,
out_grad
,
ws
,
stride
,
pad
)
else
:
inp_padded
=
pad_dims
(
inp
,
2
,
nd
)
out_grad_padded
=
pad_dims
(
out_grad
,
2
,
nd
)
ret_padded
=
GpuDnnPoolGrad
(
mode
=
mode
)(
inp_padded
,
out_grad_padded
,
out_grad_padded
,
ws
,
stride
,
pad
)
return
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
...
...
theano/gpuarray/opt_util.py
浏览文件 @
a16e91f7
...
@@ -3,12 +3,12 @@ from functools import wraps
...
@@ -3,12 +3,12 @@ from functools import wraps
import
numpy
import
numpy
from
theano
import
scalar
as
scal
,
Constant
from
theano
import
tensor
,
scalar
as
scal
,
Constant
from
theano.gof
import
local_optimizer
from
theano.gof
import
local_optimizer
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
NotScalarConstantError
)
NotScalarConstantError
)
from
.basic_ops
import
GpuFromHost
,
HostFromGpu
,
GpuAllocEmpty
,
gpu_alloc_empty
from
.basic_ops
import
GpuFromHost
,
HostFromGpu
,
GpuAllocEmpty
,
GpuReshape
,
gpu_alloc_empty
from
.elemwise
import
GpuDimShuffle
,
GpuElemwise
from
.elemwise
import
GpuDimShuffle
,
GpuElemwise
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
...
@@ -329,3 +329,48 @@ def inplace_allocempty(op, idx):
...
@@ -329,3 +329,48 @@ def inplace_allocempty(op, idx):
return
maker
(
node
,
inputs
)
return
maker
(
node
,
inputs
)
return
opt
return
opt
return
wrapper
return
wrapper
def
pad_dims
(
input
,
leftdims
,
rightdims
):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert
input
.
ndim
>=
rightdims
if
input
.
ndim
==
(
leftdims
+
rightdims
):
return
input
# extract image dimensions
img_shape
=
input
.
shape
[
-
rightdims
:]
# count the number of "leading" dimensions, store as dmatrix
batch_size
=
tensor
.
prod
(
input
.
shape
[:
-
rightdims
])
batch_size
=
tensor
.
shape_padright
(
batch_size
,
1
)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape
=
tensor
.
cast
(
tensor
.
join
(
0
,
batch_size
,
tensor
.
as_tensor
([
1
]
*
(
leftdims
-
1
)),
img_shape
),
'int64'
)
input_ND
=
GpuReshape
(
leftdims
+
rightdims
)(
input
,
new_shape
)
return
input_ND
def
unpad_dims
(
output
,
input
,
leftdims
,
rightdims
):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if
output
.
ndim
==
input
.
ndim
:
return
output
# restore the output to the original shape
outshp
=
tensor
.
join
(
0
,
input
.
shape
[:
-
rightdims
],
output
.
shape
[
-
rightdims
:])
return
GpuReshape
(
input
.
ndim
)(
output
,
outshp
)
theano/gpuarray/tests/test_dnn.py
浏览文件 @
a16e91f7
...
@@ -11,11 +11,12 @@ from six import StringIO
...
@@ -11,11 +11,12 @@ from six import StringIO
import
theano.tensor
as
T
import
theano.tensor
as
T
import
theano.tests.unittest_tools
as
utt
import
theano.tests.unittest_tools
as
utt
from
theano.sandbox.neighbours
import
images2neibs
from
theano.sandbox.neighbours
import
images2neibs
from
theano.tensor.signal.pool
import
pool_2d
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.signal.pool
import
MaxPoolGrad
,
AveragePoolGrad
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
from
..
import
dnn
from
..
import
dnn
from
..basic_ops
import
GpuAllocEmpty
from
..basic_ops
import
GpuAllocEmpty
from
..type
import
gpuarray_shared_constructor
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
from
.
import
test_nnet
from
.
import
test_nnet
...
@@ -166,6 +167,27 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None,
...
@@ -166,6 +167,27 @@ def pool_2d_i2n(input, ds=(2, 2), strides=None,
return
pooled_output
return
pooled_output
def
pool3d2d
(
input
,
ds
=
(
2
,
2
,
2
),
strides
=
None
,
pad
=
(
0
,
0
,
0
),
pool_function
=
T
.
max
,
mode
=
'ignore_borders'
):
if
strides
is
None
:
strides
=
ds
shape
=
input
.
shape
# reshape to B, C*0, 1, 2 and do the pooling on 1, 2
first
=
input
.
reshape
((
shape
[
0
],
shape
[
1
]
*
shape
[
2
],
shape
[
3
],
shape
[
4
]))
pooled1
=
pool_2d_i2n
(
first
,
ds
=
ds
[
1
:],
strides
=
strides
[
1
:],
pad
=
pad
[
1
:],
pool_function
=
pool_function
,
mode
=
mode
)
shp1
=
pooled1
.
shape
# reshape to B, C, 0, 1*2 and do the pooling on 0
second
=
pooled1
.
reshape
((
shape
[
0
],
shape
[
1
],
shape
[
2
],
shp1
[
2
]
*
shp1
[
3
]))
pooled2
=
pool_2d_i2n
(
second
,
ds
=
(
ds
[
0
],
1
),
strides
=
(
strides
[
0
],
1
),
pad
=
(
pad
[
0
],
0
),
pool_function
=
pool_function
,
mode
=
mode
)
shp2
=
pooled2
.
shape
return
pooled2
.
reshape
((
shape
[
0
],
shape
[
1
],
shp2
[
2
],
shp1
[
2
],
shp1
[
3
]))
def
test_pooling
():
def
test_pooling
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
@@ -178,7 +200,7 @@ def test_pooling():
...
@@ -178,7 +200,7 @@ def test_pooling():
x
=
T
.
ftensor4
()
x
=
T
.
ftensor4
()
for
mode
,
pad
in
product
(
modes
,
for
mode
,
pad
in
product
(
modes
,
((
0
,
0
),
(
1
,
0
),
(
1
,
0
),
(
2
,
3
),
(
3
,
2
))):
((
0
,
0
),
(
1
,
0
),
(
0
,
1
),
(
2
,
3
),
(
3
,
2
))):
if
mode
==
'max'
:
if
mode
==
'max'
:
func
=
T
.
max
func
=
T
.
max
else
:
else
:
...
@@ -278,6 +300,119 @@ def test_pooling():
...
@@ -278,6 +300,119 @@ def test_pooling():
utt
.
assert_allclose
(
c_out
,
g_out
)
utt
.
assert_allclose
(
c_out
,
g_out
)
def
test_pooling_3d
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
# 'average_exc_pad' is disabled for versions < 4004
if
dnn
.
version
(
raises
=
False
)
<
4004
:
modes
=
(
'max'
,
'average_inc_pad'
)
else
:
modes
=
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
x
=
T
.
ftensor5
()
for
mode
,
pad
in
product
(
modes
,
((
0
,
0
,
0
),
(
1
,
0
,
0
),
(
0
,
1
,
0
),
(
0
,
0
,
1
),
(
3
,
2
,
2
),
(
2
,
3
,
2
),
(
2
,
2
,
3
))):
if
mode
==
'max'
:
func
=
T
.
max
else
:
func
=
T
.
mean
if
pad
!=
(
0
,
0
,
0
)
and
func
is
T
.
mean
:
continue
for
ws
in
(
4
,
2
,
5
):
for
stride
in
(
2
,
3
):
if
stride
>
ws
:
continue
if
pad
[
0
]
>
stride
or
pad
[
1
]
>
stride
or
pad
[
2
]
>
stride
:
# Not implemented
continue
# We will check that the opt introduced it.
out1
=
pool_3d
(
x
,
(
ws
,
ws
,
ws
),
st
=
(
stride
,
stride
,
stride
),
ignore_border
=
True
,
padding
=
pad
,
mode
=
mode
)
out2
=
pool3d2d
(
x
,
ds
=
(
ws
,
ws
,
ws
),
strides
=
(
stride
,
stride
,
stride
),
pad
=
pad
,
pool_function
=
func
)
mode_without_gpu2
=
mode_without_gpu
.
including
()
mode_without_gpu2
.
check_isfinite
=
False
f1
=
theano
.
function
([
x
],
out1
,
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPool
)
for
node
in
f1
.
maker
.
fgraph
.
apply_nodes
])
f2
=
theano
.
function
([
x
],
out2
,
mode
=
mode_without_gpu2
)
assert
not
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPool
)
for
node
in
f2
.
maker
.
fgraph
.
apply_nodes
])
for
shp
in
[(
1
,
10
,
30
,
30
,
30
),
(
1
,
3
,
29
,
29
,
29
),
(
3
,
1
,
47
,
97
,
47
),
]:
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
a
=
f1
(
data
)
b
=
f2
(
data
)
utt
.
assert_allclose
(
a
,
b
)
# Test the grad
for
shp
in
[(
1
,
1
,
2
,
2
,
2
),
(
1
,
1
,
3
,
3
,
3
)]:
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
"float32"
)
*
10
ws
=
2
stride
=
2
if
pad
[
0
]
>
stride
or
pad
[
1
]
>
stride
or
pad
[
2
]
>
stride
:
# Not implemented
continue
# This test the CPU grad + opt + GPU implemtentation
def
fn
(
x
):
return
pool_3d
(
x
,
(
ws
,
ws
,
ws
),
ignore_border
=
True
,
padding
=
pad
,
mode
=
mode
)
utt
.
verify_grad
(
fn
,
[
data
],
cast_to_output_type
=
False
,
mode
=
mode_with_gpu
)
# Confirm that the opt would have inserted it.
fg
=
theano
.
function
([
x
],
theano
.
grad
(
fn
(
x
)
.
sum
(),
x
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPoolGrad
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
# Test the GPU grad + GPU implementation
def
fn
(
x
):
dnn_op
=
dnn
.
dnn_pool
(
x
,
ws
=
(
ws
,
ws
,
ws
),
stride
=
(
stride
,
stride
,
stride
),
pad
=
pad
,
mode
=
mode
)
return
dnn_op
utt
.
verify_grad
(
fn
,
[
data
],
cast_to_output_type
=
False
,
mode
=
mode_with_gpu
)
# Confirm that we get the good op.
fg
=
theano
.
function
([
x
],
theano
.
grad
(
fn
(
x
)
.
sum
(),
x
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPoolGrad
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
g_out
=
fg
(
data
)
# Compare against the CPU result
out
=
pool_3d
(
x
,
(
ws
,
ws
,
ws
),
padding
=
pad
,
ignore_border
=
True
,
mode
=
mode
)
fc
=
theano
.
function
([
x
],
theano
.
grad
(
out
.
sum
(),
x
),
mode
=
mode_without_gpu
)
if
mode
==
'max'
:
assert
any
([
isinstance
(
node
.
op
,
MaxPoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
else
:
assert
any
([
isinstance
(
node
.
op
,
AveragePoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
c_out
=
fc
(
data
)
utt
.
assert_allclose
(
c_out
,
g_out
)
def
test_pooling_with_tensor_vars
():
def
test_pooling_with_tensor_vars
():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
@@ -331,6 +466,7 @@ def test_pooling_opt():
...
@@ -331,6 +466,7 @@ def test_pooling_opt():
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
# 2D pooling
x
=
T
.
fmatrix
()
x
=
T
.
fmatrix
()
f
=
theano
.
function
(
f
=
theano
.
function
(
...
@@ -344,6 +480,7 @@ def test_pooling_opt():
...
@@ -344,6 +480,7 @@ def test_pooling_opt():
f
(
numpy
.
zeros
((
10
,
10
),
dtype
=
'float32'
))
f
(
numpy
.
zeros
((
10
,
10
),
dtype
=
'float32'
))
# gradient of 2D pooling
f
=
theano
.
function
(
f
=
theano
.
function
(
[
x
],
[
x
],
T
.
grad
(
pool_2d
(
x
,
ds
=
(
2
,
2
),
mode
=
'average_inc_pad'
,
T
.
grad
(
pool_2d
(
x
,
ds
=
(
2
,
2
),
mode
=
'average_inc_pad'
,
...
@@ -362,11 +499,86 @@ def test_pooling_opt():
...
@@ -362,11 +499,86 @@ def test_pooling_opt():
pool_2d
(
x
,
ds
=
(
2
,
3
),
mode
=
'sum'
,
pool_2d
(
x
,
ds
=
(
2
,
3
),
mode
=
'sum'
,
ignore_border
=
True
),
ignore_border
=
True
),
mode
=
mode_with_gpu
)
mode
=
mode_with_gpu
)
data
=
numpy
.
random
.
rand
(
10
,
10
)
.
astype
(
'float32'
)
f
(
data
)
# 3D pooling
x
=
T
.
ftensor3
()
f
=
theano
.
function
(
[
x
],
pool_3d
(
x
,
ds
=
(
2
,
2
,
2
),
mode
=
'average_inc_pad'
,
ignore_border
=
True
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
n
.
op
,
dnn
.
GpuDnnPool
)
assert
any
([
isinstance
(
n
.
op
,
dnn
.
GpuDnnPool
)
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
data
=
numpy
.
random
.
rand
(
10
,
10
)
.
astype
(
'float32'
)
f
(
data
)
f
(
numpy
.
zeros
((
10
,
10
,
10
),
dtype
=
'float32'
))
# gradient of 3D pooling
f
=
theano
.
function
(
[
x
],
T
.
grad
(
pool_3d
(
x
,
ds
=
(
2
,
2
,
2
),
mode
=
'average_inc_pad'
,
ignore_border
=
True
)
.
sum
(),
x
),
mode
=
mode_with_gpu
.
including
(
"cudnn"
))
assert
any
([
isinstance
(
n
.
op
,
dnn
.
GpuDnnPoolGrad
)
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
f
(
numpy
.
zeros
((
10
,
10
,
10
),
dtype
=
'float32'
))
def
test_pooling_opt_arbitrary_dimensions
():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
# 'average_exc_pad' is disabled for versions < 4004
if
dnn
.
version
(
raises
=
False
)
<
4004
:
modes
=
(
'max'
,
'average_inc_pad'
)
else
:
modes
=
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
for
n_non_pool_dims
in
(
0
,
1
,
2
,
3
):
for
ws
in
((
2
,
2
),
(
3
,
3
,
3
)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp
=
(
2
,)
*
n_non_pool_dims
+
(
5
,)
*
len
(
ws
)
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
'float32'
)
input
=
gpuarray_shared_constructor
(
data
)
for
mode
in
modes
:
out_pool
=
Pool
(
ndim
=
len
(
ws
),
mode
=
mode
,
ignore_border
=
True
)(
input
,
ws
)
out_pool_grad
=
T
.
grad
(
T
.
sum
(
out_pool
),
wrt
=
input
)
out
=
[
out_pool
,
out_pool_grad
]
# run on GPU
fg
=
theano
.
function
([],
out
,
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPool
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
dnn
.
GpuDnnPoolGrad
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
res_gpu
=
fg
()
# run on CPU
fc
=
theano
.
function
([],
out
,
mode
=
mode_without_gpu
)
assert
any
([
isinstance
(
node
.
op
,
Pool
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
if
mode
==
'max'
:
assert
any
([
isinstance
(
node
.
op
,
MaxPoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
else
:
assert
any
([
isinstance
(
node
.
op
,
AveragePoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
res_cpu
=
fg
()
# check for similarity
utt
.
assert_allclose
(
res_gpu
[
0
],
res_cpu
[
0
])
utt
.
assert_allclose
(
res_gpu
[
1
],
res_cpu
[
1
])
def
test_dnn_tag
():
def
test_dnn_tag
():
...
@@ -625,6 +837,33 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -625,6 +837,33 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnPool
dnn
.
GpuDnnPool
)
)
def
test_pool_3d
(
self
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
img
=
T
.
ftensor5
(
'img'
)
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
'float32'
)
# 'average_exc_pad' is disabled for versions < 4004
if
dnn
.
version
(
raises
=
False
)
<
4004
:
modes
=
[
'max'
,
'average_inc_pad'
]
else
:
modes
=
[
'max'
,
'average_inc_pad'
,
'average_exc_pad'
]
for
params
in
product
(
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
3
,
3
,
3
)],
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
3
,
3
,
3
)],
modes
):
self
.
_compile_and_check
(
[
img
],
[
dnn
.
GpuDnnPool
(
mode
=
params
[
2
])(
img
,
params
[
0
],
params
[
1
],
(
0
,
0
,
0
))],
[
img_val
],
dnn
.
GpuDnnPool
)
def
test_pool_grad
(
self
):
def
test_pool_grad
(
self
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
...
@@ -664,6 +903,45 @@ class TestDnnInferShapes(utt.InferShapeTester):
...
@@ -664,6 +903,45 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn
.
GpuDnnPoolGrad
dnn
.
GpuDnnPoolGrad
)
)
def
test_pool_3d_grad
(
self
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
raise
SkipTest
(
dnn
.
dnn_available
.
msg
)
img
=
T
.
ftensor5
(
'img'
)
img_grad
=
T
.
ftensor5
(
'img_grad'
)
out
=
T
.
ftensor5
(
'out'
)
img_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
'float32'
)
img_grad_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
'float32'
)
out_val
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
2
,
3
,
4
,
5
,
6
),
dtype
=
'float32'
)
for
params
in
product
(
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
3
,
3
,
3
)],
[(
1
,
1
,
1
),
(
2
,
2
,
2
),
(
3
,
3
,
3
)],
[
'max'
,
'average_inc_pad'
]
):
pool_grad
=
dnn
.
GpuDnnPoolGrad
(
mode
=
params
[
2
])(
img
,
out
,
img_grad
,
params
[
0
],
params
[
1
],
(
0
,
0
,
0
)
)
self
.
_compile_and_check
(
[
img
,
img_grad
,
out
],
[
pool_grad
],
[
img_val
,
img_grad_val
,
out_val
],
dnn
.
GpuDnnPoolGrad
)
# this has been a problem in the past
# this has been a problem in the past
def
test_dnn_conv_border_mode
():
def
test_dnn_conv_border_mode
():
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
a16e91f7
...
@@ -30,7 +30,8 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
...
@@ -30,7 +30,8 @@ from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
from
theano.sandbox.cuda.blas
import
(
GpuConv
,
GpuDownsampleFactorMax
,
from
theano.sandbox.cuda.blas
import
(
GpuConv
,
GpuDownsampleFactorMax
,
GpuDownsampleFactorMaxGrad
)
GpuDownsampleFactorMaxGrad
)
from
theano.sandbox.cuda.nnet
import
GpuSoftmax
from
theano.sandbox.cuda.nnet
import
GpuSoftmax
from
theano.sandbox.cuda.opt_util
import
alpha_merge
,
output_merge
from
theano.sandbox.cuda.opt_util
import
(
alpha_merge
,
output_merge
,
pad_dims
,
unpad_dims
)
from
theano.sandbox.cuda
import
gpu_seqopt
,
register_opt
from
theano.sandbox.cuda
import
gpu_seqopt
,
register_opt
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compiler
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compiler
...
@@ -1391,20 +1392,23 @@ class GpuDnnPoolDesc(GpuOp):
...
@@ -1391,20 +1392,23 @@ class GpuDnnPoolDesc(GpuOp):
def
do_constant_folding
(
self
,
node
):
def
do_constant_folding
(
self
,
node
):
return
False
return
False
def
__init__
(
self
,
ws
=
(
1
,
1
),
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)
):
def
__init__
(
self
,
ws
=
(
1
,
1
),
stride
=
None
,
mode
=
'max'
,
pad
=
None
):
if
mode
==
'average'
:
if
mode
==
'average'
:
mode
=
'average_inc_pad'
mode
=
'average_inc_pad'
assert
mode
in
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
assert
mode
in
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
self
.
mode
=
mode
self
.
mode
=
mode
if
stride
is
None
:
stride
=
(
1
,)
*
len
(
ws
)
if
pad
is
None
:
pad
=
(
0
,)
*
len
(
ws
)
assert
len
(
ws
)
==
len
(
stride
)
and
len
(
stride
)
==
len
(
pad
)
assert
len
(
ws
)
==
len
(
stride
)
and
len
(
stride
)
==
len
(
pad
)
assert
len
(
ws
)
in
(
2
,
3
)
assert
len
(
ws
)
in
(
2
,
3
)
self
.
ws
=
ws
self
.
ws
=
ws
self
.
stride
=
stride
self
.
stride
=
stride
self
.
pad
=
pad
self
.
pad
=
pad
if
(
pad
[
0
]
!=
0
or
pad
[
1
]
!=
0
)
and
version
()
==
-
1
:
raise
RuntimeError
(
"cuDNN pooling with padding requires cuDNN v2"
)
if
self
.
get_ndim
()
==
3
and
version
()
<
(
3000
,
3000
):
if
self
.
get_ndim
()
==
3
and
version
()
<
(
3000
,
3000
):
raise
RuntimeError
(
"cuDNN 3d pooling requires cuDNN v3"
)
raise
RuntimeError
(
"cuDNN 3d pooling requires cuDNN v3"
)
if
(
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
if
(
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
...
@@ -1418,12 +1422,9 @@ class GpuDnnPoolDesc(GpuOp):
...
@@ -1418,12 +1422,9 @@ class GpuDnnPoolDesc(GpuOp):
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
if
not
hasattr
(
self
,
'pad'
):
if
not
hasattr
(
self
,
'pad'
):
self
.
pad
=
(
0
,
0
)
self
.
pad
=
(
0
,
)
*
self
.
get_ndim
(
)
def
make_node
(
self
):
def
make_node
(
self
):
if
self
.
pad
!=
(
0
,
0
)
and
version
()
==
-
1
:
raise
RuntimeError
(
"cuDNN pooling with padding requires cuDNN v2"
)
node
=
Apply
(
self
,
[],
node
=
Apply
(
self
,
[],
[
CDataType
(
"cudnnPoolingDescriptor_t"
,
[
CDataType
(
"cudnnPoolingDescriptor_t"
,
freefunc
=
"cudnnDestroyPoolingDescriptor"
)()])
freefunc
=
"cudnnDestroyPoolingDescriptor"
)()])
...
@@ -1444,8 +1445,6 @@ class GpuDnnPoolDesc(GpuOp):
...
@@ -1444,8 +1445,6 @@ class GpuDnnPoolDesc(GpuOp):
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif
self
.
mode
==
"average_exc_pad"
:
elif
self
.
mode
==
"average_exc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if
version
()
==
-
1
:
raise
Exception
(
"cudnn v1 do not support average_exc_pad"
)
else
:
else
:
raise
NotImplementedError
(
"Unsupported pooling model."
)
raise
NotImplementedError
(
"Unsupported pooling model."
)
...
@@ -1616,8 +1615,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
...
@@ -1616,8 +1615,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif
self
.
mode
==
"average_exc_pad"
:
elif
self
.
mode
==
"average_exc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if
version
()
==
-
1
:
raise
Exception
(
"cudnn v1 do not support average_exc_pad"
)
else
:
else
:
raise
NotImplementedError
(
"Unsupported pooling model."
)
raise
NotImplementedError
(
"Unsupported pooling model."
)
...
@@ -1872,8 +1869,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
...
@@ -1872,8 +1869,6 @@ if (pool%(name)s != NULL) { cudnnDestroyPoolingDescriptor(pool%(name)s); }
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
elif
self
.
mode
==
"average_exc_pad"
:
elif
self
.
mode
==
"average_exc_pad"
:
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
mode_flag
=
'CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING'
if
version
()
==
-
1
:
raise
Exception
(
"cudnn v1 do not support average_exc_pad"
)
else
:
else
:
raise
NotImplementedError
(
"Unsupported pooling model."
)
raise
NotImplementedError
(
"Unsupported pooling model."
)
...
@@ -1976,28 +1971,33 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
...
@@ -1976,28 +1971,33 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
return
[
shape
[
0
]]
return
[
shape
[
0
]]
def
dnn_pool
(
img
,
ws
,
stride
=
(
1
,
1
),
mode
=
'max'
,
pad
=
(
0
,
0
)
):
def
dnn_pool
(
img
,
ws
,
stride
=
None
,
mode
=
'max'
,
pad
=
None
):
"""
"""
GPU pooling using cuDNN from NVIDIA.
GPU pooling using cuDNN from NVIDIA.
The memory layout to use is 'bc01', that is 'batch', 'channel',
For 2D pooling, the memory layout to use is 'bc01', that is 'batch',
'first dim', 'second dim' in that order.
'channel', 'first dim', 'second dim' in that order.
For 3D pooling, the memory layout to use is 'bc012', that is 'batch',
'channel', 'first dim', 'second dim', 'third dim'.
Parameters
Parameters
----------
----------
img
img
Images to do the pooling over.
Images to do the pooling over.
ws
ws
Subsampling window size.
Subsampling window size.
Should have 2 or 3 elements.
stride
stride
Subsampling stride (default: (1, 1)).
Subsampling stride (default: (1, 1)
or (1, 1, 1)
).
mode : {'max', 'average_inc_pad', 'average_exc_pad, 'sum'}
mode : {'max', 'average_inc_pad', 'average_exc_pad
'
, 'sum'}
pad
:
pad
(pad_h, pad_w) padding information
.
Padding: (pad_h, pad_w) for 2D or (pad_h, pad_w, pad_d) for 3D
.
pad_h is the number of zero-valued pixels added to each of the top and
pad_h is the number of zero-valued pixels added to each of the top and
bottom borders.
bottom borders.
pad_w is the number of zero-valued pixels added to each of the left
pad_w is the number of zero-valued pixels added to each of the left
and right borders.
and right borders.
pad_d is the number of zero-valued pixels added to each of the front
and back borders (3D pooling only).
.. warning:: The cuDNN library only works with GPU that have a compute
.. warning:: The cuDNN library only works with GPU that have a compute
capability of 3.0 or higer. This means that older GPU will not
capability of 3.0 or higer. This means that older GPU will not
...
@@ -2009,6 +2009,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
...
@@ -2009,6 +2009,10 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
"""
"""
img
=
gpu_contiguous
(
img
)
img
=
gpu_contiguous
(
img
)
if
stride
is
None
:
stride
=
(
1
,)
*
len
(
ws
)
if
pad
is
None
:
pad
=
(
0
,)
*
len
(
ws
)
if
mode
==
"sum"
:
if
mode
==
"sum"
:
ret
=
GpuDnnPool
(
mode
=
"average_inc_pad"
)(
img
,
ws
,
stride
,
pad
)
ret
=
GpuDnnPool
(
mode
=
"average_inc_pad"
)(
img
,
ws
,
stride
,
pad
)
window_elem
=
theano
.
tensor
.
prod
(
ws
)
.
astype
(
ret
.
dtype
)
window_elem
=
theano
.
tensor
.
prod
(
ws
)
.
astype
(
ret
.
dtype
)
...
@@ -2972,10 +2976,21 @@ if True:
...
@@ -2972,10 +2976,21 @@ if True:
if
not
node
.
op
.
ignore_border
:
if
not
node
.
op
.
ignore_border
:
return
return
img
,
ws
,
stride
,
pad
=
node
.
inputs
img
,
ws
,
stride
,
pad
=
node
.
inputs
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
img
.
ndim
-
2
)
mode
=
node
.
op
.
mode
mode
=
node
.
op
.
mode
if
nd
not
in
(
2
,
3
):
return
if
(
img
.
owner
and
isinstance
(
img
.
owner
.
op
,
HostFromGpu
)):
if
(
img
.
owner
and
isinstance
(
img
.
owner
.
op
,
HostFromGpu
)):
if
img
.
ndim
==
nd
+
2
:
ret
=
dnn_pool
(
gpu_contiguous
(
img
.
owner
.
inputs
[
0
]),
ret
=
dnn_pool
(
gpu_contiguous
(
img
.
owner
.
inputs
[
0
]),
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
else
:
input
=
gpu_contiguous
(
img
.
owner
.
inputs
[
0
])
# reshape to 4D or 5D with 2 non-pooling dimensions
input_padded
=
pad_dims
(
input
,
2
,
nd
)
ret_padded
=
dnn_pool
(
input_padded
,
ws
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
ret
=
unpad_dims
(
ret_padded
,
input
,
2
,
nd
)
return
[
host_from_gpu
(
ret
)]
return
[
host_from_gpu
(
ret
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
...
@@ -3003,17 +3018,30 @@ if True:
...
@@ -3003,17 +3018,30 @@ if True:
if
not
node
.
op
.
ignore_border
:
if
not
node
.
op
.
ignore_border
:
return
return
inp
,
out
,
inp_grad
,
ws
,
stride
,
pad
=
node
.
inputs
inp
,
out
,
inp_grad
,
ws
,
stride
,
pad
=
node
.
inputs
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
inp
.
ndim
-
2
)
mode
=
node
.
op
.
mode
mode
=
node
.
op
.
mode
if
nd
not
in
(
2
,
3
):
return
if
((
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
HostFromGpu
))
or
if
((
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
HostFromGpu
))
or
(
out
.
owner
and
isinstance
(
out
.
owner
.
op
,
HostFromGpu
))
or
(
out
.
owner
and
isinstance
(
out
.
owner
.
op
,
HostFromGpu
))
or
(
inp_grad
.
owner
and
isinstance
(
inp_grad
.
owner
.
op
,
(
inp_grad
.
owner
and
isinstance
(
inp_grad
.
owner
.
op
,
HostFromGpu
))):
HostFromGpu
))):
if
inp
.
ndim
==
nd
+
2
:
ret
=
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
ret
=
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
gpu_contiguous
(
out
),
gpu_contiguous
(
out
),
gpu_contiguous
(
inp_grad
),
gpu_contiguous
(
inp_grad
),
ws
,
stride
,
pad
)
ws
,
stride
,
pad
)
else
:
# reshape to 4D or 5D with 2 non-pooling dimensions
inp_padded
=
pad_dims
(
gpu_contiguous
(
inp
),
2
,
nd
)
out_padded
=
pad_dims
(
gpu_contiguous
(
out
),
2
,
nd
)
inp_grad_padded
=
pad_dims
(
gpu_contiguous
(
inp_grad
),
2
,
nd
)
ret_padded
=
GpuDnnPoolGrad
(
mode
=
mode
)(
inp_padded
,
out_padded
,
inp_grad_padded
,
ws
,
stride
,
pad
)
ret
=
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
return
[
host_from_gpu
(
ret
)]
return
[
host_from_gpu
(
ret
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
...
@@ -3025,16 +3053,28 @@ if True:
...
@@ -3025,16 +3053,28 @@ if True:
if
not
node
.
op
.
ignore_border
:
if
not
node
.
op
.
ignore_border
:
return
return
inp
,
inp_grad
,
ws
,
stride
,
pad
=
node
.
inputs
inp
,
inp_grad
,
ws
,
stride
,
pad
=
node
.
inputs
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
inp
.
ndim
-
2
)
mode
=
node
.
op
.
mode
mode
=
node
.
op
.
mode
if
nd
not
in
(
2
,
3
):
return
if
((
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
HostFromGpu
))
or
if
((
inp
.
owner
and
isinstance
(
inp
.
owner
.
op
,
HostFromGpu
))
or
(
inp_grad
.
owner
and
isinstance
(
inp_grad
.
owner
.
op
,
(
inp_grad
.
owner
and
isinstance
(
inp_grad
.
owner
.
op
,
HostFromGpu
))):
HostFromGpu
))):
if
inp
.
ndim
==
nd
+
2
:
contiguous_inp_grad
=
gpu_contiguous
(
inp_grad
)
contiguous_inp_grad
=
gpu_contiguous
(
inp_grad
)
ret
=
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
ret
=
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
contiguous_inp_grad
,
contiguous_inp_grad
,
contiguous_inp_grad
,
contiguous_inp_grad
,
ws
,
stride
,
pad
)
ws
,
stride
,
pad
)
else
:
inp_padded
=
pad_dims
(
gpu_contiguous
(
inp
),
2
,
nd
)
inp_grad_padded
=
pad_dims
(
gpu_contiguous
(
inp_grad
),
2
,
nd
)
ret_padded
=
GpuDnnPoolGrad
(
mode
=
mode
)(
inp_padded
,
inp_grad_padded
,
inp_grad_padded
,
ws
,
stride
,
pad
)
ret
=
unpad_dims
(
ret_padded
,
inp
,
2
,
nd
)
return
[
host_from_gpu
(
ret
)]
return
[
host_from_gpu
(
ret
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
a16e91f7
...
@@ -40,6 +40,7 @@ from theano.sandbox.cuda.basic_ops import (
...
@@ -40,6 +40,7 @@ from theano.sandbox.cuda.basic_ops import (
GpuSubtensor
,
GpuAdvancedSubtensor1
,
GpuSubtensor
,
GpuAdvancedSubtensor1
,
GpuAdvancedIncSubtensor1
,
GpuAdvancedIncSubtensor1_dev20
,
GpuAdvancedIncSubtensor1
,
GpuAdvancedIncSubtensor1_dev20
,
GpuIncSubtensor
,
gpu_alloc
,
GpuAlloc
,
gpu_shape
,
GpuSplit
,
GpuAllocEmpty
)
GpuIncSubtensor
,
gpu_alloc
,
GpuAlloc
,
gpu_shape
,
GpuSplit
,
GpuAllocEmpty
)
from
theano.sandbox.cuda.opt_util
import
pad_dims
,
unpad_dims
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.type
import
CudaNdarrayType
from
theano.sandbox.cuda.blas
import
(
from
theano.sandbox.cuda.blas
import
(
...
@@ -1891,15 +1892,12 @@ def local_convtransp3d_gemm(node):
...
@@ -1891,15 +1892,12 @@ def local_convtransp3d_gemm(node):
gpu_optimizer
.
register
(
"convtransp3d_gemm"
,
local_convtransp3d_gemm
)
gpu_optimizer
.
register
(
"convtransp3d_gemm"
,
local_convtransp3d_gemm
)
def
_check_constant_args_pool
(
ws
,
stride
,
pad
,
node
):
def
_check_constant_args_pool
(
ndim
,
ws
,
stride
,
pad
,
node
):
"""Check if the args of pool are constants. Warns if not."""
"""Check if the args of pool are constants. Warns if not."""
try
:
try
:
ws_w
=
tensor
.
get_scalar_constant_value
(
ws
[
0
])
ws
=
tuple
(
tensor
.
get_scalar_constant_value
(
ws
[
i
])
for
i
in
range
(
ndim
))
ws_h
=
tensor
.
get_scalar_constant_value
(
ws
[
1
])
stride
=
tuple
(
tensor
.
get_scalar_constant_value
(
stride
[
i
])
for
i
in
range
(
ndim
))
stride_w
=
tensor
.
get_scalar_constant_value
(
stride
[
0
])
pad
=
tuple
(
tensor
.
get_scalar_constant_value
(
pad
[
i
])
for
i
in
range
(
ndim
))
stride_h
=
tensor
.
get_scalar_constant_value
(
stride
[
1
])
pad_w
=
tensor
.
get_scalar_constant_value
(
pad
[
0
])
pad_h
=
tensor
.
get_scalar_constant_value
(
pad
[
1
])
except
tensor
.
NotScalarConstantError
:
except
tensor
.
NotScalarConstantError
:
msg
=
(
"Pool with tensor variable for the window size, stride or "
msg
=
(
"Pool with tensor variable for the window size, stride or "
"padding is only supported in the new GPU backend, so this op "
"padding is only supported in the new GPU backend, so this op "
...
@@ -1909,65 +1907,96 @@ def _check_constant_args_pool(ws, stride, pad, node):
...
@@ -1909,65 +1907,96 @@ def _check_constant_args_pool(ws, stride, pad, node):
elif
config
.
assert_no_cpu_op
==
"raise"
:
elif
config
.
assert_no_cpu_op
==
"raise"
:
raise
AssertionError
(
msg
)
raise
AssertionError
(
msg
)
return
None
return
None
ws
=
(
ws_w
,
ws_h
)
stride
=
(
stride_w
,
stride_h
)
pad
=
(
pad_w
,
pad_h
)
return
ws
,
stride
,
pad
return
ws
,
stride
,
pad
@register_opt
()
@register_opt
()
@local_optimizer
([
pool
.
Pool
])
@local_optimizer
([
pool
.
Pool
])
def
local_gpu_downsample_factor_max
(
node
):
def
local_gpu_downsample_factor_max
(
node
):
if
isinstance
(
node
.
op
,
pool
.
Pool
):
if
(
isinstance
(
node
.
op
,
pool
.
Pool
)
):
assert
node
.
op
.
__props__
==
(
'ignore_border'
,
'mode'
)
assert
node
.
op
.
__props__
==
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
x
,
ws
,
stride
,
pad
=
node
.
inputs
x
,
ws
,
stride
,
pad
=
node
.
inputs
ret
=
_check_constant_args_pool
(
ws
,
stride
,
pad
,
node
)
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
x
.
ndim
-
2
)
ret
=
_check_constant_args_pool
(
nd
,
ws
,
stride
,
pad
,
node
)
if
ret
is
None
:
if
ret
is
None
:
return
return
ws
,
stride
,
pad
=
ret
ws
,
stride
,
pad
=
ret
if
(
pad
)
!=
(
0
,
0
)
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
:
if
(
nd
!=
2
or
max
(
node
.
op
.
padding
)
!=
0
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
):
return
return
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
gpu_ds
=
GpuDownsampleFactorMax
(
ws
,
node
.
op
.
ignore_border
)
gpu_ws
=
GpuDownsampleFactorMax
(
ws
,
node
.
op
.
ignore_border
)
return
[
host_from_gpu
(
gpu_ds
(
x
.
owner
.
inputs
[
0
]))]
if
node
.
inputs
[
0
]
.
ndim
==
4
:
return
[
host_from_gpu
(
gpu_ws
(
x
.
owner
.
inputs
[
0
]))]
else
:
input_4D
=
pad_dims
(
x
.
owner
.
inputs
[
0
],
2
,
2
)
output_4D
=
gpu_ws
(
input_4D
)
output
=
unpad_dims
(
output_4D
,
x
.
owner
.
inputs
[
0
],
2
,
2
)
return
[
host_from_gpu
(
output
)]
@register_opt
()
@register_opt
()
@local_optimizer
([
pool
.
MaxPoolGrad
])
@local_optimizer
([
pool
.
MaxPoolGrad
])
def
local_gpu_downsample_factor_max_grad
(
node
):
def
local_gpu_downsample_factor_max_grad
(
node
):
if
isinstance
(
node
.
op
,
pool
.
MaxPoolGrad
):
if
(
isinstance
(
node
.
op
,
pool
.
MaxPoolGrad
)
):
assert
node
.
op
.
__props__
==
(
'ignore_border'
,
'mode'
)
assert
node
.
op
.
__props__
==
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
x
,
z
,
gz
,
ws
,
stride
,
pad
=
node
.
inputs
x
,
z
,
gz
,
ws
,
stride
,
pad
=
node
.
inputs
ret
=
_check_constant_args_pool
(
ws
,
stride
,
pad
,
node
)
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
x
.
ndim
-
2
)
ret
=
_check_constant_args_pool
(
nd
,
ws
,
stride
,
pad
,
node
)
if
ret
is
None
:
if
ret
is
None
:
return
return
ws
,
stride
,
pad
=
ret
ws
,
stride
,
pad
=
ret
if
pad
!=
(
0
,
0
)
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
:
if
(
nd
!=
2
or
max
(
node
.
op
.
padding
)
!=
0
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
):
return
return
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
gpu_ds_grad
=
GpuDownsampleFactorMaxGrad
(
ws
,
node
.
op
.
ignore_border
)
gpu_ws_grad
=
GpuDownsampleFactorMaxGrad
(
ws
,
node
.
op
.
ignore_border
)
return
[
host_from_gpu
(
gpu_ds_grad
(
x
.
owner
.
inputs
[
0
],
if
node
.
inputs
[
0
]
.
ndim
==
4
:
return
[
host_from_gpu
(
gpu_ws_grad
(
x
.
owner
.
inputs
[
0
],
as_cuda_ndarray_variable
(
z
),
as_cuda_ndarray_variable
(
z
),
as_cuda_ndarray_variable
(
gz
)))]
as_cuda_ndarray_variable
(
gz
)))]
else
:
x_4D
=
pad_dims
(
x
.
owner
.
inputs
[
0
],
2
,
2
)
z_4D
=
pad_dims
(
as_cuda_ndarray_variable
(
z
),
2
,
2
)
gz_4D
=
pad_dims
(
as_cuda_ndarray_variable
(
gz
),
2
,
2
)
output_4D
=
gpu_ws_grad
(
x_4D
,
z_4D
,
gz_4D
)
output
=
unpad_dims
(
output_4D
,
x
.
owner
.
inputs
[
0
],
2
,
2
)
return
[
host_from_gpu
(
output
)]
@register_opt
()
@register_opt
()
@local_optimizer
([
pool
.
DownsampleFactorMaxGradGrad
])
@local_optimizer
([
pool
.
DownsampleFactorMaxGradGrad
])
def
local_gpu_downsample_factor_max_grad_grad
(
node
):
def
local_gpu_downsample_factor_max_grad_grad
(
node
):
if
isinstance
(
node
.
op
,
pool
.
DownsampleFactorMaxGradGrad
):
if
isinstance
(
node
.
op
,
pool
.
DownsampleFactorMaxGradGrad
):
assert
node
.
op
.
__props__
==
(
'ignore_border'
,
'mode'
)
assert
node
.
op
.
__props__
==
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
x
,
z
,
gx
,
ws
,
stride
,
pad
=
node
.
inputs
x
,
z
,
gx
,
ws
,
stride
,
pad
=
node
.
inputs
ret
=
_check_constant_args_pool
(
ws
,
stride
,
pad
,
node
)
nd
=
node
.
op
.
ndim
if
node
.
op
.
ndim
else
(
x
.
ndim
-
2
)
ret
=
_check_constant_args_pool
(
nd
,
ws
,
stride
,
pad
,
node
)
if
ret
is
None
:
if
ret
is
None
:
return
return
ws
,
stride
,
pad
=
ret
ws
,
stride
,
pad
=
ret
if
pad
!=
(
0
,
0
)
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
:
if
(
nd
!=
2
or
max
(
node
.
op
.
padding
)
!=
0
or
node
.
op
.
mode
!=
'max'
or
stride
!=
ws
):
return
return
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
if
(
x
.
owner
and
isinstance
(
x
.
owner
.
op
,
HostFromGpu
)):
op
=
GpuDownsampleFactorMaxGradGrad
(
ws
,
node
.
op
.
ignore_border
)
op
=
GpuDownsampleFactorMaxGradGrad
(
ws
,
node
.
op
.
ignore_border
)
if
node
.
inputs
[
0
]
.
ndim
==
4
:
return
[
host_from_gpu
(
op
(
x
.
owner
.
inputs
[
0
],
return
[
host_from_gpu
(
op
(
x
.
owner
.
inputs
[
0
],
as_cuda_ndarray_variable
(
z
),
as_cuda_ndarray_variable
(
z
),
as_cuda_ndarray_variable
(
gx
)))]
as_cuda_ndarray_variable
(
gx
)))]
else
:
x_4D
=
pad_dims
(
x
.
owner
.
inputs
[
0
],
2
,
2
)
z_4D
=
pad_dims
(
as_cuda_ndarray_variable
(
z
),
2
,
2
)
gx_4D
=
pad_dims
(
as_cuda_ndarray_variable
(
gx
),
2
,
2
)
output_4D
=
op
(
x_4D
,
z_4D
,
gx_4D
)
output
=
unpad_dims
(
output_4D
,
x
.
owner
.
inputs
[
0
],
2
,
2
)
return
[
host_from_gpu
(
output
)]
@register_opt
()
@register_opt
()
...
...
theano/sandbox/cuda/opt_util.py
浏览文件 @
a16e91f7
...
@@ -3,13 +3,13 @@ from functools import wraps
...
@@ -3,13 +3,13 @@ from functools import wraps
import
numpy
import
numpy
from
theano
import
scalar
as
scal
,
Constant
from
theano
import
tensor
,
scalar
as
scal
,
Constant
from
theano.gof
import
local_optimizer
from
theano.gof
import
local_optimizer
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
from
theano.tensor
import
(
DimShuffle
,
get_scalar_constant_value
,
NotScalarConstantError
)
NotScalarConstantError
)
from
theano.sandbox.cuda.basic_ops
import
(
from
theano.sandbox.cuda.basic_ops
import
(
GpuFromHost
,
HostFromGpu
,
host_from_gpu
,
GpuDimShuffle
,
GpuElemwise
)
GpuFromHost
,
HostFromGpu
,
host_from_gpu
,
GpuDimShuffle
,
GpuElemwise
,
GpuReshape
)
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
_one
=
scal
.
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float32'
))
...
@@ -126,3 +126,48 @@ def output_merge(cls, alpha_in, beta_in, out_in):
...
@@ -126,3 +126,48 @@ def output_merge(cls, alpha_in, beta_in, out_in):
return
maker
(
targ
,
*
inputs
)
return
maker
(
targ
,
*
inputs
)
return
opt
return
opt
return
wrapper
return
wrapper
def
pad_dims
(
input
,
leftdims
,
rightdims
):
"""Reshapes the input to a (leftdims + rightdims) tensor
This helper function is used to convert pooling inputs with arbitrary
non-pooling dimensions to the correct number of dimensions for the
GPU pooling ops.
This reduces or expands the number of dimensions of the input to
exactly `leftdims`, by adding extra dimensions on the left or by
combining some existing dimensions on the left of the input.
"""
assert
input
.
ndim
>=
rightdims
if
input
.
ndim
==
(
leftdims
+
rightdims
):
return
input
# extract image dimensions
img_shape
=
input
.
shape
[
-
rightdims
:]
# count the number of "leading" dimensions, store as dmatrix
batch_size
=
tensor
.
prod
(
input
.
shape
[:
-
rightdims
])
batch_size
=
tensor
.
shape_padright
(
batch_size
,
1
)
# store in the required shape, for example as a 4D tensor
# with shape: (batch_size,1,height,width)
new_shape
=
tensor
.
cast
(
tensor
.
join
(
0
,
batch_size
,
tensor
.
as_tensor
([
1
]
*
(
leftdims
-
1
)),
img_shape
),
'int64'
)
input_ND
=
GpuReshape
(
leftdims
+
rightdims
)(
input
,
new_shape
)
return
input_ND
def
unpad_dims
(
output
,
input
,
leftdims
,
rightdims
):
"""Reshapes the output after pad_dims.
This reverts the padding by `pad_dims`.
"""
if
output
.
ndim
==
input
.
ndim
:
return
output
# restore the output to the original shape
outshp
=
tensor
.
join
(
0
,
input
.
shape
[:
-
rightdims
],
output
.
shape
[
-
rightdims
:])
return
GpuReshape
(
input
.
ndim
)(
output
,
outshp
)
theano/sandbox/cuda/tests/test_blas.py
浏览文件 @
a16e91f7
...
@@ -326,7 +326,9 @@ if 0:
...
@@ -326,7 +326,9 @@ if 0:
def
test_downsample
():
def
test_downsample
():
shps
=
[(
1
,
1
,
1
,
12
),
shps
=
[(
1
,
12
),
(
1
,
1
,
12
),
(
1
,
1
,
1
,
12
),
(
1
,
1
,
2
,
2
),
(
1
,
1
,
2
,
2
),
(
1
,
1
,
1
,
1
),
(
1
,
1
,
1
,
1
),
(
1
,
1
,
4
,
4
),
(
1
,
1
,
4
,
4
),
...
@@ -359,17 +361,17 @@ def test_downsample():
...
@@ -359,17 +361,17 @@ def test_downsample():
for
shp
in
shps
:
for
shp
in
shps
:
for
ds
in
(
2
,
2
),
(
3
,
2
),
(
1
,
1
):
for
ds
in
(
2
,
2
),
(
3
,
2
),
(
1
,
1
):
if
ds
[
0
]
>
shp
[
2
]:
if
ds
[
0
]
>
shp
[
-
2
]:
continue
continue
if
ds
[
1
]
>
shp
[
3
]:
if
ds
[
1
]
>
shp
[
-
1
]:
continue
continue
# GpuDownsampleFactorMax doesn't like having more than 512 columns
# GpuDownsampleFactorMax doesn't like having more than 512 columns
# in the output tensor.
# in the output tensor.
if
float
(
shp
[
3
])
/
ds
[
1
]
>
512
:
if
float
(
shp
[
-
1
])
/
ds
[
1
]
>
512
:
continue
continue
for
ignore_border
in
(
True
,
False
):
for
ignore_border
in
(
True
,
False
):
# print 'test_downsample', shp, ds, ignore_border
# print 'test_downsample', shp, ds, ignore_border
ds_op
=
Pool
(
ignore_border
=
ignore_border
)
ds_op
=
Pool
(
ndim
=
len
(
ds
),
ignore_border
=
ignore_border
)
a
=
tcn
.
shared_constructor
(
my_rand
(
*
shp
),
'a'
)
a
=
tcn
.
shared_constructor
(
my_rand
(
*
shp
),
'a'
)
f
=
pfunc
([],
ds_op
(
tensor
.
as_tensor_variable
(
a
),
ds
),
f
=
pfunc
([],
ds_op
(
tensor
.
as_tensor_variable
(
a
),
ds
),
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
a16e91f7
...
@@ -15,8 +15,8 @@ import theano
...
@@ -15,8 +15,8 @@ import theano
import
theano.tensor
as
T
import
theano.tensor
as
T
import
theano.tests.unittest_tools
as
utt
import
theano.tests.unittest_tools
as
utt
from
theano.sandbox.neighbours
import
images2neibs
from
theano.sandbox.neighbours
import
images2neibs
from
theano.tensor.signal.pool
import
pool_2d
from
theano.tensor.signal.pool
import
pool_2d
,
pool_3d
from
theano.tensor.signal.pool
import
MaxPoolGrad
,
AveragePoolGrad
from
theano.tensor.signal.pool
import
Pool
,
MaxPoolGrad
,
AveragePoolGrad
import
theano.sandbox.cuda.dnn
as
dnn
import
theano.sandbox.cuda.dnn
as
dnn
from
theano.sandbox.cuda.basic_ops
import
GpuAllocEmpty
,
gpu_alloc_empty
from
theano.sandbox.cuda.basic_ops
import
GpuAllocEmpty
,
gpu_alloc_empty
from
theano.sandbox.cuda
import
float32_shared_constructor
as
shared
from
theano.sandbox.cuda
import
float32_shared_constructor
as
shared
...
@@ -170,7 +170,7 @@ def test_dnn_conv_inplace():
...
@@ -170,7 +170,7 @@ def test_dnn_conv_inplace():
def
pool3d2d
(
input
,
ds
=
(
2
,
2
,
2
),
strides
=
None
,
pad
=
(
0
,
0
,
0
),
def
pool3d2d
(
input
,
ds
=
(
2
,
2
,
2
),
strides
=
None
,
pad
=
(
0
,
0
,
0
),
pool_func
=
T
.
max
,
mode
=
'ignore_borders'
):
pool_func
tion
=
T
.
max
,
mode
=
'ignore_borders'
):
if
strides
is
None
:
if
strides
is
None
:
strides
=
ds
strides
=
ds
...
@@ -179,13 +179,13 @@ def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
...
@@ -179,13 +179,13 @@ def pool3d2d(input, ds=(2, 2, 2), strides=None, pad=(0, 0, 0),
# resahpe to B, C*0, 1, 2 and do the pooling on 1, 2
# resahpe to B, C*0, 1, 2 and do the pooling on 1, 2
first
=
input
.
reshape
((
shape
[
0
],
shape
[
1
]
*
shape
[
2
],
shape
[
3
],
shape
[
4
]))
first
=
input
.
reshape
((
shape
[
0
],
shape
[
1
]
*
shape
[
2
],
shape
[
3
],
shape
[
4
]))
pooled1
=
pool_2d_i2n
(
first
,
ds
=
ds
[
1
:],
strides
=
strides
[
1
:],
pad
=
pad
[
1
:],
pooled1
=
pool_2d_i2n
(
first
,
ds
=
ds
[
1
:],
strides
=
strides
[
1
:],
pad
=
pad
[
1
:],
pool_function
=
pool_func
,
mode
=
mode
)
pool_function
=
pool_func
tion
,
mode
=
mode
)
shp1
=
pooled1
.
shape
shp1
=
pooled1
.
shape
# reshape to B, C, 0, 1*2 and do the pooling on 0
# reshape to B, C, 0, 1*2 and do the pooling on 0
second
=
pooled1
.
reshape
((
shape
[
0
],
shape
[
1
],
shape
[
2
],
shp1
[
2
]
*
shp1
[
3
]))
second
=
pooled1
.
reshape
((
shape
[
0
],
shape
[
1
],
shape
[
2
],
shp1
[
2
]
*
shp1
[
3
]))
pooled2
=
pool_2d_i2n
(
second
,
ds
=
(
ds
[
0
],
1
),
strides
=
(
strides
[
0
],
1
),
pooled2
=
pool_2d_i2n
(
second
,
ds
=
(
ds
[
0
],
1
),
strides
=
(
strides
[
0
],
1
),
pad
=
(
pad
[
0
],
0
),
pool_function
=
pool_func
,
mode
=
mode
)
pad
=
(
pad
[
0
],
0
),
pool_function
=
pool_func
tion
,
mode
=
mode
)
shp2
=
pooled2
.
shape
shp2
=
pooled2
.
shape
return
pooled2
.
reshape
((
shape
[
0
],
shape
[
1
],
shp2
[
2
],
shp1
[
2
],
shp1
[
3
]))
return
pooled2
.
reshape
((
shape
[
0
],
shape
[
1
],
shp2
[
2
],
shp1
[
2
],
shp1
[
3
]))
...
@@ -241,8 +241,6 @@ def test_pooling():
...
@@ -241,8 +241,6 @@ def test_pooling():
func
=
T
.
max
func
=
T
.
max
else
:
else
:
func
=
T
.
mean
func
=
T
.
mean
if
pad
!=
(
0
,
0
)
and
cuda
.
dnn
.
version
()
==
-
1
:
continue
if
pad
!=
(
0
,
0
)
and
func
is
T
.
mean
:
if
pad
!=
(
0
,
0
)
and
func
is
T
.
mean
:
continue
continue
...
@@ -418,6 +416,7 @@ def test_pooling3d():
...
@@ -418,6 +416,7 @@ def test_pooling3d():
if
not
cuda
.
dnn
.
dnn_available
()
or
cuda
.
dnn
.
version
()
<
(
3000
,
3000
):
if
not
cuda
.
dnn
.
dnn_available
()
or
cuda
.
dnn
.
version
()
<
(
3000
,
3000
):
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
# We force the FAST_RUN as we don't want the reference to run in DebugMode.
mode_without_gpu_ref
=
theano
.
compile
.
mode
.
get_mode
(
mode_without_gpu_ref
=
theano
.
compile
.
mode
.
get_mode
(
'FAST_RUN'
)
.
excluding
(
'gpu'
)
'FAST_RUN'
)
.
excluding
(
'gpu'
)
...
@@ -427,8 +426,7 @@ def test_pooling3d():
...
@@ -427,8 +426,7 @@ def test_pooling3d():
else
:
else
:
modes
=
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
modes
=
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
x
=
T
.
TensorType
(
broadcastable
=
(
False
,
False
,
False
,
False
,
False
),
x
=
T
.
ftensor5
()
dtype
=
'float32'
)()
for
mode
,
pad
in
product
(
modes
,
for
mode
,
pad
in
product
(
modes
,
((
0
,
0
,
0
),
(
1
,
0
,
0
),
(
0
,
1
,
0
),
(
0
,
0
,
1
),
((
0
,
0
,
0
),
(
1
,
0
,
0
),
(
0
,
1
,
0
),
(
0
,
0
,
1
),
(
2
,
3
,
2
),
(
3
,
2
,
2
),
(
2
,
2
,
3
))):
(
2
,
3
,
2
),
(
3
,
2
,
2
),
(
2
,
2
,
3
))):
...
@@ -436,8 +434,6 @@ def test_pooling3d():
...
@@ -436,8 +434,6 @@ def test_pooling3d():
func
=
T
.
max
func
=
T
.
max
else
:
else
:
func
=
T
.
mean
func
=
T
.
mean
if
pad
!=
(
0
,
0
,
0
)
and
cuda
.
dnn
.
version
()
==
-
1
:
continue
if
pad
!=
(
0
,
0
,
0
)
and
func
is
T
.
mean
:
if
pad
!=
(
0
,
0
,
0
)
and
func
is
T
.
mean
:
continue
continue
...
@@ -449,13 +445,13 @@ def test_pooling3d():
...
@@ -449,13 +445,13 @@ def test_pooling3d():
if
pad
[
0
]
>
stride
or
pad
[
1
]
>
stride
or
pad
[
2
]
>
stride
:
if
pad
[
0
]
>
stride
or
pad
[
1
]
>
stride
or
pad
[
2
]
>
stride
:
# Not implemented
# Not implemented
continue
continue
out1
=
cuda
.
dnn
.
dnn_pool
(
x
,
(
ws
,
ws
,
ws
),
out1
=
pool_3d
(
x
,
(
ws
,
ws
,
ws
),
stride
=
(
stride
,
stride
,
stride
),
st
=
(
stride
,
stride
,
stride
),
pad
=
pad
,
mode
=
mode
)
ignore_border
=
True
,
out2
=
pool3d2d
(
x
,
ds
=
(
ws
,
ws
,
ws
),
padding
=
pad
,
mode
=
mode
)
strides
=
(
stride
,
stride
,
stride
),
out2
=
pool3d2d
(
x
,
ds
=
(
ws
,
ws
,
ws
),
strides
=
(
stride
,
stride
,
stride
),
pad
=
pad
,
pool_func
=
func
)
pad
=
pad
,
pool_function
=
func
)
f1
=
theano
.
function
([
x
],
out1
,
mode
=
mode_with_gpu
)
f1
=
theano
.
function
([
x
],
out1
,
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
node
in
f1
.
maker
.
fgraph
.
apply_nodes
])
for
node
in
f1
.
maker
.
fgraph
.
apply_nodes
])
...
@@ -510,11 +506,17 @@ def test_pooling3d():
...
@@ -510,11 +506,17 @@ def test_pooling3d():
g_out
=
fg
(
data
)
g_out
=
fg
(
data
)
# Compare again the CPU result
# Compare again the CPU result
out
=
pool
3d2
d
(
x
,
(
ws
,
ws
,
ws
),
out
=
pool
_3
d
(
x
,
(
ws
,
ws
,
ws
),
strides
=
(
stride
,
stride
,
stride
)
,
padding
=
pad
,
pad
=
pad
,
pool_func
=
func
)
ignore_border
=
True
,
mode
=
mode
)
fc
=
theano
.
function
([
x
],
theano
.
grad
(
out
.
sum
(),
x
),
fc
=
theano
.
function
([
x
],
theano
.
grad
(
out
.
sum
(),
x
),
mode
=
mode_without_gpu_ref
)
mode
=
mode_without_gpu_ref
)
if
mode
==
'max'
:
assert
any
([
isinstance
(
node
.
op
,
MaxPoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
else
:
assert
any
([
isinstance
(
node
.
op
,
AveragePoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
c_out
=
fc
(
data
)
c_out
=
fc
(
data
)
utt
.
assert_allclose
(
c_out
,
g_out
)
utt
.
assert_allclose
(
c_out
,
g_out
)
...
@@ -523,6 +525,7 @@ def test_pooling_opt():
...
@@ -523,6 +525,7 @@ def test_pooling_opt():
if
not
cuda
.
dnn
.
dnn_available
():
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
# 2D pooling
x
=
T
.
fmatrix
()
x
=
T
.
fmatrix
()
f
=
theano
.
function
(
f
=
theano
.
function
(
...
@@ -535,6 +538,7 @@ def test_pooling_opt():
...
@@ -535,6 +538,7 @@ def test_pooling_opt():
f
(
numpy
.
zeros
((
10
,
10
),
dtype
=
'float32'
))
f
(
numpy
.
zeros
((
10
,
10
),
dtype
=
'float32'
))
# gradient of 2D pooling
f
=
theano
.
function
(
f
=
theano
.
function
(
[
x
],
[
x
],
T
.
grad
(
pool_2d
(
x
,
ds
=
(
2
,
2
),
mode
=
'average_inc_pad'
,
T
.
grad
(
pool_2d
(
x
,
ds
=
(
2
,
2
),
mode
=
'average_inc_pad'
,
...
@@ -545,6 +549,7 @@ def test_pooling_opt():
...
@@ -545,6 +549,7 @@ def test_pooling_opt():
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
f
(
numpy
.
zeros
((
10
,
10
),
dtype
=
'float32'
))
f
(
numpy
.
zeros
((
10
,
10
),
dtype
=
'float32'
))
# Test sum pooling
# Test sum pooling
f
=
theano
.
function
(
f
=
theano
.
function
(
[
x
],
[
x
],
...
@@ -557,6 +562,82 @@ def test_pooling_opt():
...
@@ -557,6 +562,82 @@ def test_pooling_opt():
data
=
numpy
.
random
.
rand
(
10
,
10
)
.
astype
(
'float32'
)
data
=
numpy
.
random
.
rand
(
10
,
10
)
.
astype
(
'float32'
)
f
(
data
)
f
(
data
)
# 3D pooling
x
=
T
.
ftensor3
()
f
=
theano
.
function
(
[
x
],
pool_3d
(
x
,
ds
=
(
2
,
2
,
2
),
mode
=
'average_inc_pad'
,
ignore_border
=
True
),
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
n
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
f
(
numpy
.
zeros
((
10
,
10
,
10
),
dtype
=
'float32'
))
# gradient of 3D pooling
f
=
theano
.
function
(
[
x
],
T
.
grad
(
pool_3d
(
x
,
ds
=
(
2
,
2
,
2
),
mode
=
'average_inc_pad'
,
ignore_border
=
True
)
.
sum
(),
x
),
mode
=
mode_with_gpu
.
including
(
"cudnn"
))
assert
any
([
isinstance
(
n
.
op
,
cuda
.
dnn
.
GpuDnnPoolGrad
)
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
f
(
numpy
.
zeros
((
10
,
10
,
10
),
dtype
=
'float32'
))
def
test_pooling_opt_arbitrary_dimensions
():
# test if input with an arbitrary number of non-pooling dimensions
# is correctly reshaped to run on the GPU
if
not
cuda
.
dnn
.
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
# 'average_exc_pad' is disabled for versions < 4004
if
cuda
.
dnn
.
version
()
<
(
4004
,
4004
):
modes
=
(
'max'
,
'average_inc_pad'
)
else
:
modes
=
(
'max'
,
'average_inc_pad'
,
'average_exc_pad'
)
for
n_non_pool_dims
in
(
0
,
1
,
2
,
3
):
for
ws
in
((
2
,
2
),
(
3
,
3
,
3
)):
# create input shape: non-pooling dimensions
# followed by 2 or 3 pooling dimensions
shp
=
(
2
,)
*
n_non_pool_dims
+
(
5
,)
*
len
(
ws
)
data
=
numpy
.
random
.
normal
(
0
,
1
,
shp
)
.
astype
(
'float32'
)
input
=
shared
(
data
)
for
mode
in
modes
:
out_pool
=
Pool
(
ndim
=
len
(
ws
),
mode
=
mode
,
ignore_border
=
True
)(
input
,
ws
)
out_pool_grad
=
T
.
grad
(
T
.
sum
(
out_pool
),
wrt
=
input
)
out
=
[
out_pool
,
out_pool_grad
]
# run on GPU
fg
=
theano
.
function
([],
out
,
mode
=
mode_with_gpu
)
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPool
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
node
.
op
,
cuda
.
dnn
.
GpuDnnPoolGrad
)
for
node
in
fg
.
maker
.
fgraph
.
toposort
()])
res_gpu
=
fg
()
# run on CPU
fc
=
theano
.
function
([],
out
,
mode
=
mode_without_gpu
)
assert
any
([
isinstance
(
node
.
op
,
Pool
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
if
mode
==
'max'
:
assert
any
([
isinstance
(
node
.
op
,
MaxPoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
else
:
assert
any
([
isinstance
(
node
.
op
,
AveragePoolGrad
)
for
node
in
fc
.
maker
.
fgraph
.
toposort
()])
res_cpu
=
fg
()
# check for similarity
utt
.
assert_allclose
(
res_gpu
[
0
],
res_cpu
[
0
])
utt
.
assert_allclose
(
res_gpu
[
1
],
res_cpu
[
1
])
class
test_DnnSoftMax
(
test_nnet
.
test_SoftMax
):
class
test_DnnSoftMax
(
test_nnet
.
test_SoftMax
):
gpu_op
=
dnn
.
GpuDnnSoftmax
gpu_op
=
dnn
.
GpuDnnSoftmax
...
...
theano/tensor/signal/pool.py
浏览文件 @
a16e91f7
...
@@ -7,6 +7,7 @@ Pool, DownsampleAvg, DownsampleSoftmax.
...
@@ -7,6 +7,7 @@ Pool, DownsampleAvg, DownsampleSoftmax.
from
__future__
import
absolute_import
,
print_function
,
division
from
__future__
import
absolute_import
,
print_function
,
division
# This file should move along with conv.py
# This file should move along with conv.py
import
warnings
import
warnings
import
itertools
import
numpy
import
numpy
from
six.moves
import
xrange
from
six.moves
import
xrange
...
@@ -84,67 +85,101 @@ def pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
...
@@ -84,67 +85,101 @@ def pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
" Otherwise, the convolution will be executed on CPU."
,
" Otherwise, the convolution will be executed on CPU."
,
stacklevel
=
2
)
stacklevel
=
2
)
ignore_border
=
False
ignore_border
=
False
if
input
.
ndim
==
4
:
op
=
Pool
(
ignore_border
,
ndim
=
2
,
mode
=
mode
)
op
=
Pool
(
ignore_border
,
mode
=
mode
)
output
=
op
(
input
,
ds
,
st
,
padding
)
output
=
op
(
input
,
ds
,
st
,
padding
)
return
output
return
output
# extract image dimensions
img_shape
=
input
.
shape
[
-
2
:]
# count the number of "leading" dimensions, store as dmatrix
def
pool_3d
(
input
,
ds
,
ignore_border
=
None
,
st
=
None
,
padding
=
(
0
,
0
,
0
),
batch_size
=
tensor
.
prod
(
input
.
shape
[:
-
2
])
mode
=
'max'
):
batch_size
=
tensor
.
shape_padright
(
batch_size
,
1
)
"""Downscale the input by a specified factor
# store as 4D tensor with shape: (batch_size,1,height,width)
Takes as input a N-D tensor, where N >= 3. It downscales the input image by
new_shape
=
tensor
.
cast
(
tensor
.
join
(
0
,
batch_size
,
the specified factor, by keeping only the maximum value of non-overlapping
tensor
.
as_tensor
([
1
]),
patches of size (ds[0],ds[1],ds[2])
img_shape
),
'int64'
)
input_4D
=
tensor
.
reshape
(
input
,
new_shape
,
ndim
=
4
)
# downsample mini-batch of images
Parameters
op
=
Pool
(
ignore_border
,
mode
=
mode
)
----------
output
=
op
(
input_4D
,
ds
,
st
,
padding
)
input : N-D theano tensor of input images
Input images. Max pooling will be done over the 3 last dimensions.
ds : tuple of length 3 or theano vector of ints of size 3
Factor by which to downscale (vertical ds, horizontal ds, depth ds).
(2,2,2) will halve the image in each dimension.
ignore_border : bool (default None, will print a warning and set to False)
When True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output.
(3,3,3) otherwise.
st : tuple of three ints or theano vector of ints of size 3
Stride size, which is the number of shifts over rows/cols/slices to get
the next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
padding : tuple of two ints or theano vector of ints of size 3
(pad_h, pad_w, pad_d), pad zeros to extend beyond six borders of the
images, pad_h is the size of the top and bottom margins,
pad_w is the size of the left and right margins, and pad_d is the size
of the front and back margins
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
Operation executed on each window. `max` and `sum` always exclude
the padding in the computation. `average` gives you the choice to
include or exclude it.
# restore to original shape
"""
outshp
=
tensor
.
join
(
0
,
input
.
shape
[:
-
2
],
output
.
shape
[
-
2
:])
if
input
.
ndim
<
3
:
return
tensor
.
reshape
(
output
,
outshp
,
ndim
=
input
.
ndim
)
raise
NotImplementedError
(
'pool_3d requires a dimension >= 3'
)
if
ignore_border
is
None
:
warnings
.
warn
(
"pool_3d() will have the parameter ignore_border"
" default value changed to True (currently"
" False). To have consistent behavior with all Theano"
" version, explicitly add the parameter ignore_border=True."
" On the GPU, using ignore_border=True is needed to use cuDNN."
" When using ignore_border=False and not using cuDNN, the only"
" GPU combination supported is when"
" `ds == st and padding == (0, 0, 0) and mode == 'max'`."
" Otherwise, the convolution will be executed on CPU."
,
stacklevel
=
2
)
ignore_border
=
False
op
=
Pool
(
ignore_border
,
ndim
=
3
,
mode
=
mode
)
output
=
op
(
input
,
ds
,
st
,
padding
)
return
output
class
Pool
(
OpenMPOp
):
class
Pool
(
OpenMPOp
):
"""
"""
For N-dimensional tensors, consider that the last two dimensions span
This Op downsamples the last N dimensions of the input by taking the max,
images. This Op downsamples these images by taking the max, sum or average
sum or average over different patches.
over different patch.
The constructor takes the max, sum or average or different input patches.
Parameters
Parameters
----------
----------
ds : list or tuple of
two
ints
ds : list or tuple of
N
ints
Downsample factor over rows
and column
.
Downsample factor over rows
, columns etc
.
ds indicates the
pool region size
.
ds indicates the
size of the pooling region
.
ignore_border : bool
ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col
If ds doesn't divide imgshape, do we include an extra row/col
/slice
of partial downsampling (False) or ignore it (True).
of partial downsampling (False) or ignore it (True).
st : list or tuple of
two
ints or None
st : list or tuple of
N
ints or None
Stride size, which is the number of shifts over rows/cols to get the
Stride size, which is the number of shifts over rows/cols
/slices
to get the
next pool region. If st is None, it is considered equal to ds
next pool region. If st is None, it is considered equal to ds
(no overlap on pooling regions).
(no overlap on pooling regions).
padding: tuple of two ints
padding : tuple of N ints or None
(pad_h, pad_w), pad zeros to extend beyond four borders of the images,
For each downsampling dimension, this specifies the number of zeros to
pad_h is the size of the top and bottom margins, and pad_w is the size
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
of the left and right margins.
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'}
('average_inc_pad' excludes the padding from the count,
('average_inc_pad' excludes the padding from the count,
'average_exc_pad' include it)
'average_exc_pad' include it)
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
"""
"""
__props__
=
(
'ignore_border'
,
'mode'
)
__props__
=
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
@staticmethod
@staticmethod
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
)
):
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
None
,
ndim
=
None
):
"""
"""
Return the shape of the output from this op, for input of given
Return the shape of the output from this op, for input of given
shape and flags.
shape and flags.
...
@@ -152,88 +187,79 @@ class Pool(OpenMPOp):
...
@@ -152,88 +187,79 @@ class Pool(OpenMPOp):
Parameters
Parameters
----------
----------
imgshape : tuple, list, or similar of integer or scalar Theano variable
imgshape : tuple, list, or similar of integer or scalar Theano variable
The shape of a tensor of images. The last
two
elements are
The shape of a tensor of images. The last
N
elements are
interpreted as the number of rows, and the number of cols.
interpreted as the number of rows, and the number of cols.
ds : list or tuple of two ints
ds : list or tuple of N ints
Downsample factor over rows and columns this parameter indicates
Downsample factor over rows and column.
the size of the pooling region.
ds indicates the pool region size.
st : list or tuple of two ints
The stride size. This is the distance between the pooling regions.
If it's set to None, it equals ds.
ignore_border : bool
ignore_border : bool
If ds doesn't divide imgshape, do we include an extra row/col of
If ds doesn't divide imgshape, do we include an extra row/col/slice
partial downsampling (False) or ignore it (True).
of partial downsampling (False) or ignore it (True).
padding : tuple of two ints
st : list or tuple of N ints or None
(pad_h, pad_w), pad zeros to extend beyond four borders
Stride size, which is the number of shifts over rows/cols/slices to get the
of the images, pad_h is the size of the top and bottom margins,
next pool region. If st is None, it is considered equal to ds
and pad_w is the size of the left and right margins.
(no overlap on pooling regions).
padding : tuple of N ints or None
For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
Returns
Returns
-------
-------
list
list
The shape of the output from this op, for input of given shape.
The shape of the output from this op, for input of given shape.
This will have the same length as imgshape, but with last
two
This will have the same length as imgshape, but with last
N
elements reduced as per the downsampling & ignore_border flags.
elements reduced as per the downsampling & ignore_border flags.
"""
"""
if
len
(
imgshape
)
<
2
:
if
ndim
is
None
:
raise
TypeError
(
'imgshape must have at least two elements '
ndim
=
len
(
imgshape
)
-
2
'(rows, cols)'
)
if
len
(
imgshape
)
<
ndim
:
raise
TypeError
(
'imgshape must have at least {} dimensions'
.
format
(
ndim
))
if
st
is
None
:
if
st
is
None
:
st
=
ds
st
=
ds
r
,
c
=
imgshape
[
-
2
:]
if
padding
is
None
:
r
=
tensor
.
extract_constant
(
r
)
padding
=
(
0
,)
*
ndim
c
=
tensor
.
extract_constant
(
c
)
patch_shape
=
tuple
(
tensor
.
extract_constant
(
imgshape
[
-
ndim
+
i
])
+
padding
[
i
]
*
2
if
padding
[
0
]:
for
i
in
xrange
(
ndim
))
r
=
r
+
padding
[
0
]
*
2
if
padding
[
1
]:
c
=
c
+
padding
[
1
]
*
2
def
compute_out
(
v
,
downsample
,
stride
):
if
ignore_border
:
if
ignore_border
:
if
ds
[
0
]
==
st
[
0
]:
if
downsample
==
stride
:
nr
=
r
//
st
[
0
]
return
v
//
stride
else
:
out_r
=
(
r
-
ds
[
0
])
//
st
[
0
]
+
1
if
isinstance
(
r
,
theano
.
Variable
):
nr
=
tensor
.
maximum
(
out_r
,
0
)
else
:
nr
=
numpy
.
maximum
(
out_r
,
0
)
if
ds
[
1
]
==
st
[
1
]:
nc
=
c
//
st
[
1
]
else
:
out_c
=
(
c
-
ds
[
1
])
//
st
[
1
]
+
1
if
isinstance
(
c
,
theano
.
Variable
):
nc
=
tensor
.
maximum
(
out_c
,
0
)
else
:
else
:
nc
=
numpy
.
maximum
(
out_c
,
0
)
out
=
(
v
-
downsample
)
//
stride
+
1
if
isinstance
(
out
,
theano
.
Variable
):
return
tensor
.
maximum
(
out
,
0
)
else
:
else
:
if
isinstance
(
r
,
theano
.
Variable
):
return
numpy
.
maximum
(
out
,
0
)
nr
=
tensor
.
switch
(
tensor
.
ge
(
st
[
0
],
ds
[
0
]),
(
r
-
1
)
//
st
[
0
]
+
1
,
tensor
.
maximum
(
0
,
(
r
-
1
-
ds
[
0
])
//
st
[
0
]
+
1
)
+
1
)
elif
st
[
0
]
>=
ds
[
0
]:
nr
=
(
r
-
1
)
//
st
[
0
]
+
1
else
:
else
:
nr
=
max
(
0
,
(
r
-
1
-
ds
[
0
]
+
st
[
0
])
//
st
[
0
])
+
1
if
isinstance
(
v
,
theano
.
Variable
):
return
tensor
.
switch
(
tensor
.
ge
(
stride
,
downsample
),
if
isinstance
(
c
,
theano
.
Variable
):
(
v
-
1
)
//
stride
+
1
,
nc
=
tensor
.
switch
(
tensor
.
ge
(
st
[
1
],
ds
[
1
]),
tensor
.
maximum
(
0
,
(
v
-
1
-
downsample
)
//
(
c
-
1
)
//
st
[
1
]
+
1
,
stride
+
1
)
+
1
)
tensor
.
maximum
(
0
,
(
c
-
1
-
ds
[
1
])
//
elif
stride
>=
downsample
:
st
[
1
]
+
1
)
+
1
)
return
(
v
-
1
)
//
stride
+
1
elif
st
[
1
]
>=
ds
[
1
]:
nc
=
(
c
-
1
)
//
st
[
1
]
+
1
else
:
else
:
nc
=
max
(
0
,
(
c
-
1
-
ds
[
1
]
+
st
[
1
])
//
st
[
1
]
)
+
1
return
max
(
0
,
(
v
-
1
-
downsample
+
stride
)
//
stride
)
+
1
rval
=
list
(
imgshape
[:
-
2
])
+
[
nr
,
nc
]
out_shape
=
[
compute_out
(
patch_shape
[
i
],
ds
[
i
],
st
[
i
])
for
i
in
xrange
(
ndim
)]
rval
=
list
(
imgshape
[:
-
ndim
])
+
out_shape
return
rval
return
rval
def
__init__
(
self
,
ignore_border
=
False
,
mode
=
'max'
,
openmp
=
None
):
def
__init__
(
self
,
ignore_border
=
False
,
mode
=
'max'
,
ndim
=
None
,
openmp
=
None
):
super
(
Pool
,
self
)
.
__init__
(
openmp
=
openmp
)
super
(
Pool
,
self
)
.
__init__
(
openmp
=
openmp
)
self
.
ndim
=
ndim
self
.
ignore_border
=
ignore_border
self
.
ignore_border
=
ignore_border
if
mode
not
in
[
'max'
,
'average_inc_pad'
,
'average_exc_pad'
,
'sum'
]:
if
mode
not
in
[
'max'
,
'average_inc_pad'
,
'average_exc_pad'
,
'sum'
]:
raise
ValueError
(
raise
ValueError
(
...
@@ -244,6 +270,7 @@ class Pool(OpenMPOp):
...
@@ -244,6 +270,7 @@ class Pool(OpenMPOp):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
if
len
(
node
.
inputs
)
==
1
:
if
len
(
node
.
inputs
)
==
1
:
# Old interface
# Old interface
self
.
ndim
=
len
(
node
.
op
.
ds
)
self
.
mode
=
node
.
op
.
mode
self
.
mode
=
node
.
op
.
mode
ws
=
theano
.
tensor
.
constant
(
node
.
op
.
ds
)
ws
=
theano
.
tensor
.
constant
(
node
.
op
.
ds
)
st
=
theano
.
tensor
.
constant
(
node
.
op
.
st
)
st
=
theano
.
tensor
.
constant
(
node
.
op
.
st
)
...
@@ -270,17 +297,22 @@ class Pool(OpenMPOp):
...
@@ -270,17 +297,22 @@ class Pool(OpenMPOp):
storage_map
[
pad
]
=
[
None
]
storage_map
[
pad
]
=
[
None
]
compute_map
[
pad
]
=
[
False
]
compute_map
[
pad
]
=
[
False
]
def
make_node
(
self
,
x
,
ws
,
stride
=
None
,
pad
=
(
0
,
0
)
):
def
make_node
(
self
,
x
,
ws
,
stride
=
None
,
pad
=
None
):
# TODO: consider restricting the dtype?
# TODO: consider restricting the dtype?
x
=
tensor
.
as_tensor_variable
(
x
)
x
=
tensor
.
as_tensor_variable
(
x
)
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
x
.
type
.
ndim
-
2
if
stride
is
None
:
if
stride
is
None
:
stride
=
ws
stride
=
ws
if
isinstance
(
pad
,
(
tuple
,
list
)):
if
pad
is
None
:
if
tuple
(
pad
)
!=
(
0
,
0
)
and
not
self
.
ignore_border
:
pad
=
(
0
,)
*
nd
elif
isinstance
(
pad
,
(
tuple
,
list
)):
if
max
(
pad
)
!=
0
and
not
self
.
ignore_border
:
raise
NotImplementedError
(
raise
NotImplementedError
(
'padding works only with ignore_border=True'
)
'padding works only with ignore_border=True'
)
if
isinstance
(
ws
,
(
tuple
,
list
)):
if
isinstance
(
ws
,
(
tuple
,
list
)):
if
pad
[
0
]
>=
ws
[
0
]
or
pad
[
1
]
>=
ws
[
1
]
:
if
any
(
pad
[
i
]
>=
ws
[
i
]
for
i
in
range
(
nd
))
:
raise
NotImplementedError
(
raise
NotImplementedError
(
'padding_h and padding_w must be smaller than strides'
)
'padding_h and padding_w must be smaller than strides'
)
ws
=
tensor
.
as_tensor_variable
(
ws
)
ws
=
tensor
.
as_tensor_variable
(
ws
)
...
@@ -289,7 +321,7 @@ class Pool(OpenMPOp):
...
@@ -289,7 +321,7 @@ class Pool(OpenMPOp):
assert
ws
.
ndim
==
1
assert
ws
.
ndim
==
1
assert
stride
.
ndim
==
1
assert
stride
.
ndim
==
1
assert
pad
.
ndim
==
1
assert
pad
.
ndim
==
1
if
x
.
type
.
ndim
!=
4
:
if
x
.
type
.
ndim
<
nd
:
raise
TypeError
()
raise
TypeError
()
if
not
ws
.
dtype
.
startswith
(
'int'
):
if
not
ws
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
...
@@ -298,42 +330,36 @@ class Pool(OpenMPOp):
...
@@ -298,42 +330,36 @@ class Pool(OpenMPOp):
if
not
pad
.
dtype
.
startswith
(
'int'
):
if
not
pad
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Padding parameters must be ints.'
)
raise
TypeError
(
'Padding parameters must be ints.'
)
# If the input shape are broadcastable we can have 0 in the output shape
# If the input shape are broadcastable we can have 0 in the output shape
broad
=
x
.
broadcastable
[:
2
]
+
(
False
,
False
)
broad
=
x
.
broadcastable
[:
-
nd
]
+
(
False
,)
*
nd
out
=
tensor
.
TensorType
(
x
.
dtype
,
broad
)
out
=
tensor
.
TensorType
(
x
.
dtype
,
broad
)
return
gof
.
Apply
(
self
,
[
x
,
ws
,
stride
,
pad
],
[
out
()])
return
gof
.
Apply
(
self
,
[
x
,
ws
,
stride
,
pad
],
[
out
()])
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
x
,
ws
,
stride
,
pad
=
inp
x
,
ws
,
stride
,
pad
=
inp
z
,
=
out
z
,
=
out
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
2
,)
nd
=
self
.
ndim
if
len
(
x
.
shape
)
!=
4
:
if
nd
is
None
:
nd
=
len
(
x
.
shape
)
-
2
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
nd
,)
if
len
(
x
.
shape
)
<
nd
:
raise
NotImplementedError
(
raise
NotImplementedError
(
'Pool requires
4D input for now'
)
'Pool requires
input with {} or more dimensions'
.
format
(
nd
)
)
z_shape
=
self
.
out_shape
(
x
.
shape
,
ws
,
self
.
ignore_border
,
stride
,
pad
)
z_shape
=
self
.
out_shape
(
x
.
shape
,
ws
,
self
.
ignore_border
,
stride
,
pad
,
nd
)
if
not
self
.
ignore_border
:
if
not
self
.
ignore_border
:
assert
z_shape
[
2
]
>
0
assert
all
(
z
>
0
for
z
in
z_shape
[
-
nd
:])
assert
z_shape
[
3
]
>
0
if
(
z
[
0
]
is
None
)
or
(
z
[
0
]
.
shape
!=
z_shape
):
if
(
z
[
0
]
is
None
)
or
(
z
[
0
]
.
shape
!=
z_shape
):
z
[
0
]
=
numpy
.
empty
(
z_shape
,
dtype
=
x
.
dtype
)
z
[
0
]
=
numpy
.
empty
(
z_shape
,
dtype
=
x
.
dtype
)
zz
=
z
[
0
]
zz
=
z
[
0
]
# number of pooling output rows
# size of pooling output
pr
=
zz
.
shape
[
-
2
]
pool_out_shp
=
zz
.
shape
[
-
nd
:]
# number of pooling output cols
img_shp
=
tuple
(
x
.
shape
[
-
nd
+
i
]
+
2
*
pad
[
i
]
for
i
in
xrange
(
nd
))
pc
=
zz
.
shape
[
-
1
]
ws0
,
ws1
=
ws
st0
,
st1
=
stride
pad_h
=
pad
[
0
]
pad_w
=
pad
[
1
]
img_rows
=
x
.
shape
[
-
2
]
+
2
*
pad_h
img_cols
=
x
.
shape
[
-
1
]
+
2
*
pad_w
inc_pad
=
self
.
mode
==
'average_inc_pad'
inc_pad
=
self
.
mode
==
'average_inc_pad'
# pad the image
# pad the image
if
(
pad_h
,
pad_w
)
!=
(
0
,
0
):
if
max
(
pad
)
!=
0
:
y
=
numpy
.
zeros
(
y
=
numpy
.
zeros
(
x
.
shape
[:
-
nd
]
+
img_shp
,
dtype
=
x
.
dtype
)
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
y
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
dtype
=
x
.
dtype
)
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
=
x
y
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
=
x
else
:
else
:
y
=
x
y
=
x
func
=
numpy
.
max
func
=
numpy
.
max
...
@@ -342,28 +368,30 @@ class Pool(OpenMPOp):
...
@@ -342,28 +368,30 @@ class Pool(OpenMPOp):
elif
self
.
mode
!=
'max'
:
elif
self
.
mode
!=
'max'
:
func
=
numpy
.
average
func
=
numpy
.
average
for
n
in
xrange
(
x
.
shape
[
0
]):
# precompute the region boundaries for each dimension
for
k
in
xrange
(
x
.
shape
[
1
]):
region_slices
=
[[]
for
i
in
xrange
(
nd
)]
for
r
in
xrange
(
pr
):
for
i
in
xrange
(
nd
):
row_st
=
r
*
st0
for
j
in
xrange
(
pool_out_shp
[
i
]):
row_end
=
builtins
.
min
(
row_st
+
ws0
,
img_rows
)
start
=
j
*
stride
[
i
]
if
not
inc_pad
:
end
=
builtins
.
min
(
start
+
ws
[
i
],
img_shp
[
i
])
row_st
=
builtins
.
max
(
row_st
,
pad_h
)
row_end
=
builtins
.
min
(
row_end
,
x
.
shape
[
-
2
]
+
pad_h
)
for
c
in
xrange
(
pc
):
col_st
=
c
*
st1
col_end
=
builtins
.
min
(
col_st
+
ws1
,
img_cols
)
if
not
inc_pad
:
if
not
inc_pad
:
col_st
=
builtins
.
max
(
col_st
,
pad_w
)
start
=
builtins
.
max
(
start
,
pad
[
i
])
col_end
=
builtins
.
min
(
col_end
,
end
=
builtins
.
min
(
end
,
img_shp
[
i
]
-
pad
[
i
])
x
.
shape
[
-
1
]
+
pad_w
)
region_slices
[
i
]
.
append
(
slice
(
start
,
end
))
zz
[
n
,
k
,
r
,
c
]
=
func
(
y
[
n
,
k
,
row_st
:
row_end
,
col_st
:
col_end
])
# iterate over non-pooling dimensions
for
k
in
numpy
.
ndindex
(
*
x
.
shape
[:
-
nd
]):
zzk
=
zz
[
k
]
yk
=
y
[
k
]
# iterate over pooling regions
for
r
in
numpy
.
ndindex
(
*
pool_out_shp
):
zzk
[
r
]
=
func
(
yk
[[
region_slices
[
i
][
r
[
i
]]
for
i
in
xrange
(
nd
)]])
def
infer_shape
(
self
,
node
,
in_shapes
):
def
infer_shape
(
self
,
node
,
in_shapes
):
ws
,
stride
,
pad
=
[
node
.
inputs
[
1
],
node
.
inputs
[
2
],
node
.
inputs
[
3
]]
ws
,
stride
,
pad
=
[
node
.
inputs
[
1
],
node
.
inputs
[
2
],
node
.
inputs
[
3
]]
shp
=
self
.
out_shape
(
in_shapes
[
0
],
ws
,
self
.
ignore_border
,
stride
,
shp
=
self
.
out_shape
(
in_shapes
[
0
],
ws
,
self
.
ignore_border
,
stride
,
pad
)
pad
,
self
.
ndim
)
return
[
shp
]
return
[
shp
]
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
...
@@ -372,10 +400,12 @@ class Pool(OpenMPOp):
...
@@ -372,10 +400,12 @@ class Pool(OpenMPOp):
disc
=
[
DisconnectedType
()()
for
i
in
inp
[
1
:]]
disc
=
[
DisconnectedType
()()
for
i
in
inp
[
1
:]]
if
self
.
mode
==
'max'
:
if
self
.
mode
==
'max'
:
maxout
=
self
(
x
,
ws
,
stride
,
pad
)
maxout
=
self
(
x
,
ws
,
stride
,
pad
)
return
[
MaxPoolGrad
(
ignore_border
=
self
.
ignore_border
)(
return
[
MaxPoolGrad
(
ndim
=
self
.
ndim
,
ignore_border
=
self
.
ignore_border
)(
x
,
maxout
,
gz
,
ws
=
ws
,
stride
=
stride
,
pad
=
pad
)]
+
disc
x
,
maxout
,
gz
,
ws
=
ws
,
stride
=
stride
,
pad
=
pad
)]
+
disc
else
:
else
:
return
[
AveragePoolGrad
(
ignore_border
=
self
.
ignore_border
,
return
[
AveragePoolGrad
(
ndim
=
self
.
ndim
,
ignore_border
=
self
.
ignore_border
,
mode
=
self
.
mode
)(
mode
=
self
.
mode
)(
x
,
gz
,
ws
=
ws
,
stride
=
stride
,
pad
=
pad
)]
+
disc
x
,
gz
,
ws
=
ws
,
stride
=
stride
,
pad
=
pad
)]
+
disc
...
@@ -392,292 +422,386 @@ class Pool(OpenMPOp):
...
@@ -392,292 +422,386 @@ class Pool(OpenMPOp):
raise
theano
.
gof
.
utils
.
MethodNotDefined
()
raise
theano
.
gof
.
utils
.
MethodNotDefined
()
x
,
ws
,
stride
,
pad
=
inp
x
,
ws
,
stride
,
pad
=
inp
z
,
=
out
z
,
=
out
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
node
.
inputs
[
0
]
.
ndim
-
2
total_ndim
=
node
.
inputs
[
0
]
.
ndim
non_pool_ndim
=
total_ndim
-
nd
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ignore_border
=
int
(
self
.
ignore_border
)
ignore_border
=
int
(
self
.
ignore_border
)
if
self
.
openmp
:
if
self
.
openmp
:
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, c_st, c_end, collector) schedule(static)'
# run in parallel over each pooling block
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, collector) schedule(static)'
else
:
else
:
omp_parallel
=
''
omp_parallel
=
''
ccode
=
"""
ccode
=
"""
int ws0, ws1, st0, st1, pd0, pd1;
int typenum = PyArray_ObjectType((PyObject*)
%(x)
s, 0);
int typenum = PyArray_ObjectType((PyObject*)
%(x)
s, 0);
int z_r, z_c; // shape of the output
if(PyArray_NDIM(
%(x)
s)!=
%(total_ndim)
s)
int r, c; // shape of the padded_input
if(PyArray_DIM(
%(ws)
s, 0)!=2)
{
{
PyErr_SetString(PyExc_ValueError, "
ws must be a vector of size 2
");
PyErr_SetString(PyExc_ValueError, "
x must be a
%(total_ndim)
sD ndarray
");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_DIM(
%(
stride)
s, 0)!=2
)
if(PyArray_DIM(
%(
ws)
s, 0)!=
%(nd)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "
stride must be a vector of size 2
");
PyErr_SetString(PyExc_ValueError, "
ws must be a vector of size
%(nd)
s
");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_DIM(
%(
pad)
s, 0)!=2
)
if(PyArray_DIM(
%(
stride)
s, 0)!=
%(nd)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "
pad must be a vector of size 2
");
PyErr_SetString(PyExc_ValueError, "
stride must be a vector of size
%(nd)
s
");
%(fail)
s;
%(fail)
s;
}
}
// Getting ws, stride and pad
if(PyArray_DIM(
%(pad)
s, 0)!=
%(nd)
s)
ws0 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 0));
ws1 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 1));
st0 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 0));
st1 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 1));
pd0 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 0));
pd1 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 1));
if(PyArray_NDIM(
%(x)
s)!=4)
{
{
PyErr_SetString(PyExc_ValueError, "
x must be a 4d ndarray
");
PyErr_SetString(PyExc_ValueError, "
pad must be a vector of size
%(nd)
s
");
%(fail)
s;
%(fail)
s;
}
}
r = PyArray_DIMS(
%(x)
s)[2];
int z[
%(nd)
s]; // shape of the output
c = PyArray_DIMS(
%(x)
s)[3];
int r[
%(nd)
s]; // shape of the padded_input
r += pd0 * 2;
int ws[
%(nd)
s];
c += pd1 * 2;
int st[
%(nd)
s];
if (pd0 != 0 && pd1 != 0 && !
%(ignore_border)
s)
int pd[
%(nd)
s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<
%(nd)
s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, i));
r[i] = PyArray_DIMS(
%(x)
s)[
%(non_pool_ndim)
s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
if (!
%(ignore_border)
s && nonzero_padding)
{
{
PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError,
"padding must be (0,0)
when ignore border is False");
"padding must be zero
when ignore border is False");
%(fail)
s;
%(fail)
s;
}
}
if (
%(ignore_border)
s)
if (
%(ignore_border)
s)
{
for (int i=0; i<
%(nd)
s; i++)
{
{
// '/' in C is different from '/' in python
// '/' in C is different from '/' in python
if (r - ws0
< 0)
if (r[i] - ws[i]
< 0)
{
{
z_r
= 0;
z[i]
= 0;
}
}
else
else
{
{
z_r = (r - ws0) / st0
+ 1;
z[i] = (r[i] - ws[i]) / st[i]
+ 1;
}
}
if (c - ws1 < 0)
{
z_c = 0;
}
else
{
z_c = (c - ws1) / st1 + 1;
}
}
}
}
else
else
{
{
// decide how many rows the output has
for (int i=0; i<
%(nd)
s; i++)
if (st0 >= ws0)
{
// decide how many rows/cols the output has
if (st[i] >= ws[i])
{
{
z_r = (r - 1) / st0
+ 1;
z[i] = (r[i] - 1) / st[i]
+ 1;
}
}
else
else
{
{
z_r = std::max(0, (r - 1 - ws0 + st0) / st0) + 1;
z[i] = std::max(0, (r[i] - 1 - ws[i] + st[i]) / st[i]) + 1;
}
assert(z[i] > 0);
}
}
// decide how many columns the output has
}
if (st1 >= ws1)
// memory allocation of z if necessary
int mem_nec;
mem_nec = 0;
if ((!
%(z)
s) || *PyArray_DIMS(
%(z)
s)!=
%(total_ndim)
s)
{
{
z_c = (c - 1) / st1 +
1;
mem_nec =
1;
}
}
else
if (!mem_nec)
{
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
if (PyArray_DIMS(
%(z)
s)[i] != PyArray_DIMS(
%(x)
s)[i])
{
{
z_c = std::max(0, (c - 1 - ws1 + st0) / st1) + 1;
mem_nec = 1;
break;
}
}
assert(z_r > 0);
assert(z_c > 0);
}
}
// memory allocation of z if necessary
}
if ((!
%(z)
s)
if (!mem_nec)
|| *PyArray_DIMS(
%(z)
s)!=4
{
||(PyArray_DIMS(
%(z)
s)[0] != PyArray_DIMS(
%(x)
s)[0])
for (int i=0; i<
%(nd)
s; i++)
||(PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(x)
s)[1])
{
||(PyArray_DIMS(
%(z)
s)[2] != z_r)
if (PyArray_DIMS(
%(z)
s)[
%(non_pool_ndim)
s + i] != z[i])
||(PyArray_DIMS(
%(z)
s)[3] != z_c)
{
)
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
{
if (
%(z)
s) Py_XDECREF(
%(z)
s);
if (
%(z)
s) Py_XDECREF(
%(z)
s);
npy_intp dims[4] = {0,0,0,0};
npy_intp dims[
%(total_ndim)
s];
dims[0]=PyArray_DIMS(
%(x)
s)[0];
for (int i=0; i<
%(non_pool_ndim)
s; i++)
dims[1]=PyArray_DIMS(
%(x)
s)[1];
{
dims[2]=z_r;
dims[i] = PyArray_DIMS(
%(x)
s)[i];
dims[3]=z_c;
}
for (int i=0; i<
%(nd)
s; i++)
{
dims[
%(non_pool_ndim)
s + i] = z[i];
}
//TODO: zeros not necessary
//TODO: zeros not necessary
%(z)
s = (PyArrayObject*) PyArray_ZEROS(4, dims, typenum,0);
%(z)
s = (PyArrayObject*) PyArray_ZEROS(
%(total_ndim)
s, dims, typenum,0);
}
// initialize temp var for the value in a region
dtype_
%(x)
s collector;
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<
%(nd)
s; i++)
{
z_prod *= z[i];
}
}
// used for indexing a pool region inside the input
if (z_prod)
dtype_
%(x)
s collector; // temp var for the value in a region
if (z_r && z_c)
{
{
int r_st, r_end, c_st, c_end;
// will be used to hold start and end index of a region
int r_st[
%(nd)
s];
int r_end[
%(nd)
s];
// index for iterating over the pooling regions
int r_idx[
%(nd)
s];
// placeholder for PyArray indexing (output)
long int o_idx[
%(total_ndim)
s];
// placeholder for PyArray indexing (input)
long int i_idx[
%(total_ndim)
s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
non_pooling_prod *= PyArray_DIMS(
%(x)
s)[i];
}
%(omp_parallel)
s
%(omp_parallel)
s
for(int t = 0; t < PyArray_DIMS(
%(x)
s)[0] * PyArray_DIMS(
%(x)
s)[1]; t++){
// first loop over non-pooling dimensions
int b = t
%%
PyArray_DIMS(
%(x)
s)[0];
for (int t=0; t<non_pooling_prod; t++)
int k = t / PyArray_DIMS(
%(x)
s)[0];
{
for(int i=0; i < z_r; i++){
// compute the non-pooling index in each dimension
r_st = i * st0;
if (
%(non_pool_ndim)
s!=0)
r_end = r_st + ws0;
{
// skip the padding
o_idx[0] = t;
r_st = r_st < pd0 ? pd0 : r_st;
i_idx[0] = t;
r_end = r_end > (r - pd0) ? r - pd0 : r_end;
for (int i=1; i<
%(non_pool_ndim)
s; i++)
// from padded_img space to img space
r_st -= pd0;
r_end -= pd0;
// handle the case where no padding, ignore border is True
if (
%(ignore_border)
s)
{
{
r_end = r_end > r ? r : r_end;
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(
%(x)
s)[i - 1];
o_idx[i - 1] = o_idx[i - 1]
%%
PyArray_DIMS(
%(x)
s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
for(int j=0; j<z_c; j++){
}
c_st = j * st1;
c_end = c_st + ws1;
// then loop over each region in each pooling dimension
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
for (r_idx[
%(i)
s]=0; r_idx[
%(i)
s] < z[
%(i)
s]; r_idx[
%(i)
s]++) {
r_st[
%(i)
s] = r_idx[
%(i)
s] * st[
%(i)
s];
r_end[
%(i)
s] = r_st[
%(i)
s] + ws[
%(i)
s];
// skip the padding
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st;
r_st[
%(i)
s] = r_st[
%(i)
s] < pd[
%(i)
s] ? pd[
%(i)
s] : r_st[
%(i)
s];
c_end = c_end > (c - pd1) ? c - pd1 : c_end;
r_end[
%(i)
s] = r_end[
%(i)
s] > (r[
%(i)
s] - pd[
%(i)
s]) ? r[
%(i)
s] - pd[
%(i)
s] : r_end[
%(i)
s];
dtype_
%(z)
s * z = (
// from padded_img space to img space
(dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s, b, k, i, j)));
r_st[
%(i)
s] -= pd[
%(i)
s];
// change coordinates from padding_img space into img space
r_end[
%(i)
s] -= pd[
%(i)
s];
c_st -= pd1;
c_end -= pd1;
// handle the case where no padding, ignore border is True
// handle the case where no padding, ignore border is True
if (
%(ignore_border)
s)
if (
%(ignore_border)
s)
{
{
c_end = c_end > c ? c : c_end
;
r_end[
%(i)
s] = r_end[
%(i)
s] > r[
%(i)
s] ? r[
%(i)
s] : r_end[
%(i)
s]
;
}
}
// use the index to find the correct position in the output
o_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_idx[
%(i)
s];
"""
%
dict
(
i
=
i
,
ignore_border
=
ignore_border
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
// get a pointer to the correct position in the output
dtype_
%(z)
s * z;
if (
%(total_ndim)
s == 4)
z = ((dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s, o_idx[0], o_idx[1], o_idx[2], o_idx[3])));
else
z = ((dtype_
%(z)
s*)(PyArray_GetPtr(
%(z)
s, o_idx)));
"""
"""
if
self
.
mode
==
'max'
:
if
self
.
mode
==
'max'
:
for
i
in
xrange
(
nd
):
ccode
+=
"""
// set the first index of dimension
%(i)
s
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_st[
%(i)
s];
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
ccode
+=
"""
// use the first element as the initial value of collector
// use the first element as the initial value of collector
collector = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,r_st,c_st)))[0];
if (
%(total_ndim)
s == 4)
collector = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
collector = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx)))[0];
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m
++)
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s
++)
{
{
for(int n=c_st; n<c_end; n++)
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
{
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
dtype_
%(x)
s a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,m,n)))[0];
ccode
+=
"""
// update maximum
dtype_
%(x)
s a;
if (
%(total_ndim)
s == 4)
a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
a = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx)))[0];
collector = (a > collector) ? a : collector;
collector = (a > collector) ? a : collector;
}
"""
}
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // for loop over region
"""
ccode
+=
"""
z[0] = collector;
z[0] = collector;
"""
"""
elif
self
.
mode
in
(
'sum'
,
'average_exc_pad'
,
'average_inc_pad'
):
elif
self
.
mode
in
(
'sum'
,
'average_exc_pad'
,
'average_inc_pad'
):
ccode
+=
"""
ccode
+=
"""
// initialize the sum at zero
// initialize the sum at zero
collector = ((dtype_
%(x)
s)(0));
collector = ((dtype_
%(x)
s)(0));
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m
++)
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s
++)
{
{
for(int n=c_st; n<c_end; n++)
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
{
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
dtype_
%(x)
s a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,m,n)))[0];
ccode
+=
"""
// update sum
dtype_
%(x)
s a;
if (
%(total_ndim)
s == 4)
a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
else
a = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx)))[0];
collector += a;
collector += a;
}
"""
}
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // for loop over region
"""
"""
if
self
.
mode
==
"sum"
:
if
self
.
mode
==
"sum"
:
ccode
+=
"""
ccode
+=
"""
z[0] = collector;
z[0] = collector;
"""
"""
elif
self
.
mode
==
'average_inc_pad'
and
self
.
ignore_border
:
elif
self
.
mode
==
'average_inc_pad'
and
self
.
ignore_border
:
# region size = product over all pooling dimensions
region_size
=
' * '
.
join
(
'ws[
%
d]'
%
i
for
i
in
xrange
(
nd
))
ccode
+=
"""
ccode
+=
"""
z[0] = collector / (ws0 * ws1
);
z[0] = collector / (
%(region_size)
s
);
"""
"""
%
dict
(
region_size
=
region_size
)
else
:
else
:
# region size = number elements of in this region
region_size
=
' * '
.
join
(
'(r_end[
%
d]-r_st[
%
d])'
%
(
i
,
i
)
for
i
in
xrange
(
nd
))
ccode
+=
"""
ccode
+=
"""
z[0] = collector / ((r_end-r_st)*(c_end-c_st));
z[0] = collector / (
%(region_size)
s);
"""
%
dict
(
region_size
=
region_size
)
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // loop over pooling dimension
"""
"""
ccode
+=
"""
ccode
+=
"""
}
} // for loop over non-pooling dimensions
}
} // if z_prod
}
}
"""
"""
return
ccode
%
locals
()
return
ccode
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
0
,
6
,
8
,
6
,
self
.
openmp
)
return
(
0
,
6
,
8
,
7
,
self
.
openmp
)
class
PoolGrad
(
OpenMPOp
):
class
PoolGrad
(
OpenMPOp
):
__props__
=
(
'ignore_border'
,
'mode'
)
__props__
=
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
@staticmethod
@staticmethod
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
(
0
,
0
)
):
def
out_shape
(
imgshape
,
ds
,
ignore_border
=
False
,
st
=
None
,
padding
=
None
,
ndim
=
None
):
"""Return the shape of the output from this op, for input of given
"""Return the shape of the output from this op, for input of given
shape and flags.
shape and flags.
Parameters
Parameters
----------
----------
imgshape : tuple of integers or scalar Theano variables
imgshape : tuple of integers or scalar Theano variables
the shape of a tensor of images. The last
two
elements are
the shape of a tensor of images. The last
N
elements are
interpreted as the
number of rows, and the number of col
s.
interpreted as the
downsampling dimension
s.
ds : tuple of
two
ints
ds : tuple of
N
ints
downsample factor over rows and columns this parameter
downsample factor over rows and columns this parameter
indicates the size of the pooling region
indicates the size of the pooling region
st : tuple of two ints
the stride size. This is the distance between the pooling
regions. If it's set to None, in which case it equlas ds.
ignore_border : bool
ignore_border : bool
if ds doesn't divide imgshape, do we include an extra
If ds doesn't divide imgshape, do we include an extra row/col/slice
row/col of partial downsampling (False) or ignore it
of partial downsampling (False) or ignore it (True).
(True).
st : list or tuple of N ints or None
padding : tuple of two ints
Stride size, which is the number of shifts over rows/cols/slices to get the
(pad_h, pad_w), pad zeros to extend beyond four borders of
next pool region. If st is None, it is considered equal to ds
the images, pad_h is the size of the top and bottom
(no overlap on pooling regions).
margins, and pad_w is the size of the left and right
padding : tuple of N ints or None
margins.
For each downsampling dimension, this specifies the number of zeros to
add as padding on both sides. For 2D and (pad_h, pad_w), pad_h specifies the
size of the top and bottom margins, pad_w specifies the size of the left and
right margins. No padding is added if padding is None.
ndim : int
The number of pooling dimensions N.
If this number is not specified, the default is set to the
(input.ndim - 2), assuming that the first two dimensions of the input
are non-pooling dimensions.
Returns
Returns
-------
-------
list :
list :
the shape of the output from this op, for input of given
the shape of the output from this op, for input of given
shape. This will have the same length as imgshape, but
shape. This will have the same length as imgshape, but
with last
two
elements reduced as per the downsampling &
with last
N
elements reduced as per the downsampling &
ignore_border flags.
ignore_border flags.
"""
"""
if
len
(
imgshape
)
<
2
:
if
ndim
is
None
:
raise
TypeError
(
'imgshape must have at least two elements '
ndim
=
len
(
imgshape
)
-
2
'(rows, cols)'
)
if
len
(
imgshape
)
<
ndim
:
raise
TypeError
(
'imgshape must have at least {} dimensions'
.
format
(
ndim
))
if
st
is
None
:
if
st
is
None
:
st
=
ds
st
=
ds
r
,
c
=
imgshape
[
-
2
:]
if
padding
is
None
:
r
+=
padding
[
0
]
*
2
padding
=
(
0
,)
*
ndim
c
+=
padding
[
1
]
*
2
patch_shape
=
tuple
(
tensor
.
extract_constant
(
imgshape
[
-
ndim
+
i
])
+
padding
[
i
]
*
2
for
i
in
xrange
(
ndim
))
def
compute_out
(
v
,
downsample
,
stride
):
if
ignore_border
:
if
ignore_border
:
out_r
=
(
r
-
ds
[
0
])
//
st
[
0
]
+
1
out
=
(
v
-
downsample
)
//
stride
+
1
out_c
=
(
c
-
ds
[
1
])
//
st
[
1
]
+
1
if
isinstance
(
out
,
theano
.
Variable
):
if
isinstance
(
r
,
theano
.
Variable
):
return
tensor
.
maximum
(
out
,
0
)
nr
=
tensor
.
maximum
(
out_r
,
0
)
else
:
else
:
nr
=
numpy
.
maximum
(
out_r
,
0
)
return
numpy
.
maximum
(
out
,
0
)
if
isinstance
(
c
,
theano
.
Variable
):
nc
=
tensor
.
maximum
(
out_c
,
0
)
else
:
else
:
nc
=
numpy
.
maximum
(
out_c
,
0
)
if
isinstance
(
v
,
theano
.
Variable
):
return
tensor
.
switch
(
tensor
.
ge
(
stride
,
downsample
),
(
v
-
1
)
//
stride
+
1
,
tensor
.
maximum
(
0
,
(
v
-
1
-
downsample
)
//
stride
+
1
)
+
1
)
elif
stride
>=
downsample
:
return
(
v
-
1
)
//
stride
+
1
else
:
else
:
if
isinstance
(
r
,
theano
.
Variable
):
return
max
(
0
,
(
v
-
1
-
downsample
)
//
stride
+
1
)
+
1
nr
=
tensor
.
switch
(
tensor
.
ge
(
st
[
0
],
ds
[
0
]),
(
r
-
1
)
//
st
[
0
]
+
1
,
out_shape
=
[
compute_out
(
patch_shape
[
i
],
ds
[
i
],
st
[
i
])
for
i
in
xrange
(
ndim
)]
tensor
.
maximum
(
0
,
(
r
-
1
-
ds
[
0
])
//
st
[
0
]
+
1
)
+
1
)
elif
st
[
0
]
>=
ds
[
0
]:
nr
=
(
r
-
1
)
//
st
[
0
]
+
1
else
:
nr
=
max
(
0
,
(
r
-
1
-
ds
[
0
])
//
st
[
0
]
+
1
)
+
1
if
isinstance
(
c
,
theano
.
Variable
):
nc
=
tensor
.
switch
(
tensor
.
ge
(
st
[
1
],
ds
[
1
]),
(
c
-
1
)
//
st
[
1
]
+
1
,
tensor
.
maximum
(
0
,
(
c
-
1
-
ds
[
1
])
//
st
[
1
]
+
1
)
+
1
)
elif
st
[
1
]
>=
ds
[
1
]:
nc
=
(
c
-
1
)
//
st
[
1
]
+
1
else
:
nc
=
max
(
0
,
(
c
-
1
-
ds
[
1
])
//
st
[
1
]
+
1
)
+
1
rval
=
list
(
imgshape
[:
-
2
])
+
[
nr
,
nc
]
rval
=
list
(
imgshape
[:
-
ndim
])
+
out_shape
return
rval
return
rval
def
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
openmp
=
None
):
def
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
ndim
=
None
,
openmp
=
None
):
self
.
ndim
=
ndim
self
.
ignore_border
=
ignore_border
self
.
ignore_border
=
ignore_border
if
mode
not
in
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
if
mode
not
in
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
raise
ValueError
(
raise
ValueError
(
...
@@ -689,6 +813,7 @@ class PoolGrad(OpenMPOp):
...
@@ -689,6 +813,7 @@ class PoolGrad(OpenMPOp):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
def
prepare_node
(
self
,
node
,
storage_map
,
compute_map
):
if
len
(
node
.
inputs
)
<
5
:
# 5 for AveragePoolGrad, 6 for MaxPoolGrad
if
len
(
node
.
inputs
)
<
5
:
# 5 for AveragePoolGrad, 6 for MaxPoolGrad
# Old interface
# Old interface
self
.
ndim
=
len
(
node
.
op
.
ds
)
self
.
mode
=
node
.
op
.
mode
self
.
mode
=
node
.
op
.
mode
ws
=
theano
.
tensor
.
constant
(
node
.
op
.
ds
)
ws
=
theano
.
tensor
.
constant
(
node
.
op
.
ds
)
st
=
theano
.
tensor
.
constant
(
node
.
op
.
st
)
st
=
theano
.
tensor
.
constant
(
node
.
op
.
st
)
...
@@ -720,26 +845,32 @@ class PoolGrad(OpenMPOp):
...
@@ -720,26 +845,32 @@ class PoolGrad(OpenMPOp):
class
MaxPoolGrad
(
PoolGrad
):
class
MaxPoolGrad
(
PoolGrad
):
def
__init__
(
self
,
ignore_border
,
openmp
=
None
):
def
__init__
(
self
,
ignore_border
,
ndim
=
None
,
openmp
=
None
):
PoolGrad
.
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
openmp
=
openmp
)
PoolGrad
.
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
ndim
=
ndim
,
openmp
=
openmp
)
def
make_node
(
self
,
x
,
maxout
,
gz
,
ws
,
stride
=
None
,
pad
=
(
0
,
0
)
):
def
make_node
(
self
,
x
,
maxout
,
gz
,
ws
,
stride
=
None
,
pad
=
None
):
# make_node should only be called by the grad function of
# make_node should only be called by the grad function of
# Pool, so these asserts should not fail.
# Pool, so these asserts should not fail.
x
=
tensor
.
as_tensor_variable
(
x
)
x
=
tensor
.
as_tensor_variable
(
x
)
maxout
=
tensor
.
as_tensor_variable
(
maxout
)
maxout
=
tensor
.
as_tensor_variable
(
maxout
)
gz
=
tensor
.
as_tensor_variable
(
gz
)
gz
=
tensor
.
as_tensor_variable
(
gz
)
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
x
.
ndim
-
2
if
stride
is
None
:
if
stride
is
None
:
stride
=
ws
stride
=
ws
if
pad
is
None
:
pad
=
(
0
,)
*
nd
ws
=
tensor
.
as_tensor_variable
(
ws
)
ws
=
tensor
.
as_tensor_variable
(
ws
)
stride
=
tensor
.
as_tensor_variable
(
stride
)
stride
=
tensor
.
as_tensor_variable
(
stride
)
pad
=
tensor
.
as_tensor_variable
(
pad
)
pad
=
tensor
.
as_tensor_variable
(
pad
)
assert
isinstance
(
x
,
Variable
)
and
x
.
ndim
==
4
assert
isinstance
(
x
,
Variable
)
and
x
.
ndim
>=
nd
assert
isinstance
(
maxout
,
Variable
)
and
maxout
.
ndim
==
4
assert
isinstance
(
maxout
,
Variable
)
and
maxout
.
ndim
>=
nd
assert
isinstance
(
gz
,
Variable
)
and
gz
.
ndim
==
4
assert
isinstance
(
gz
,
Variable
)
and
gz
.
ndim
>=
nd
assert
isinstance
(
ws
,
Variable
)
and
ws
.
ndim
==
1
assert
isinstance
(
ws
,
Variable
)
and
ws
.
ndim
==
1
assert
isinstance
(
stride
,
Variable
)
and
stride
.
ndim
==
1
assert
isinstance
(
stride
,
Variable
)
and
stride
.
ndim
==
1
assert
isinstance
(
pad
,
Variable
)
and
pad
.
ndim
==
1
assert
isinstance
(
pad
,
Variable
)
and
pad
.
ndim
==
1
assert
x
.
ndim
==
maxout
.
ndim
==
gz
.
ndim
>=
nd
if
not
ws
.
dtype
.
startswith
(
'int'
):
if
not
ws
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
if
not
stride
.
dtype
.
startswith
(
'int'
):
if
not
stride
.
dtype
.
startswith
(
'int'
):
...
@@ -752,41 +883,51 @@ class MaxPoolGrad(PoolGrad):
...
@@ -752,41 +883,51 @@ class MaxPoolGrad(PoolGrad):
assert
self
.
mode
==
'max'
assert
self
.
mode
==
'max'
x
,
maxout
,
gz
,
ws
,
stride
,
pad
=
inp
x
,
maxout
,
gz
,
ws
,
stride
,
pad
=
inp
gx_stg
,
=
out
gx_stg
,
=
out
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
2
,)
nd
=
self
.
ndim
# number of pooling output rows
if
nd
is
None
:
pr
=
maxout
.
shape
[
-
2
]
nd
=
len
(
x
.
shape
)
-
2
# number of pooling output cols
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
nd
,)
pc
=
maxout
.
shape
[
-
1
]
if
len
(
x
.
shape
)
<
nd
:
ws0
,
ws1
=
ws
raise
NotImplementedError
(
st0
,
st1
=
stride
'MaxPoolGrad requires input with {} or more dimensions'
.
format
(
nd
))
pad_h
=
pad
[
0
]
pool_out_shp
=
maxout
.
shape
[
-
nd
:]
pad_w
=
pad
[
1
]
img_shp
=
tuple
(
x
.
shape
[
-
nd
+
i
]
+
2
*
pad
[
i
]
for
i
in
xrange
(
nd
))
img_rows
=
x
.
shape
[
-
2
]
+
2
*
pad_h
img_cols
=
x
.
shape
[
-
1
]
+
2
*
pad_w
# pad the image
# pad the image
if
(
pad_h
,
pad_w
)
!=
(
0
,
0
):
if
max
(
pad
)
!=
0
:
y
=
numpy
.
zeros
(
y
=
numpy
.
zeros
(
x
.
shape
[:
-
nd
]
+
img_shp
,
dtype
=
x
.
dtype
)
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
y
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
dtype
=
x
.
dtype
)
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
=
x
y
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
=
x
else
:
else
:
y
=
x
y
=
x
gx
=
numpy
.
zeros_like
(
y
)
gx
=
numpy
.
zeros_like
(
y
)
for
n
in
xrange
(
x
.
shape
[
0
]):
for
k
in
xrange
(
x
.
shape
[
1
]):
# precompute the region boundaries for each dimension
for
r
in
xrange
(
pr
):
region_ranges
=
[[]
for
i
in
xrange
(
nd
)]
row_st
=
builtins
.
max
(
r
*
st0
,
pad_h
)
for
i
in
xrange
(
nd
):
row_end
=
builtins
.
min
(
row_st
+
ws0
,
img_rows
)
for
j
in
xrange
(
pool_out_shp
[
i
]):
for
c
in
xrange
(
pc
):
start
=
builtins
.
max
(
j
*
stride
[
i
],
pad
[
i
])
col_st
=
builtins
.
max
(
c
*
st1
,
pad_w
)
end
=
builtins
.
min
(
start
+
ws
[
i
],
img_shp
[
i
])
col_end
=
builtins
.
min
(
col_st
+
ws1
,
img_cols
)
region_ranges
[
i
]
.
append
(
xrange
(
start
,
end
))
for
row_ind
in
xrange
(
row_st
,
row_end
):
for
col_ind
in
xrange
(
col_st
,
col_end
):
# iterate over non-pooling dimensions
if
(
maxout
[
n
,
k
,
r
,
c
]
==
y
[
n
,
k
,
row_ind
,
col_ind
]):
for
k
in
numpy
.
ndindex
(
*
x
.
shape
[:
-
nd
]):
gx
[
n
,
k
,
row_ind
,
col_ind
]
+=
gz
[
n
,
k
,
r
,
c
]
gxk
=
gx
[
k
]
gzk
=
gz
[
k
]
yk
=
y
[
k
]
maxoutk
=
maxout
[
k
]
# iterate over pooling regions
for
r
in
numpy
.
ndindex
(
*
pool_out_shp
):
maxout_value
=
maxoutk
[
r
]
# iterate inside region
for
c
in
itertools
.
product
(
*
[
region_ranges
[
i
][
r
[
i
]]
for
i
in
xrange
(
nd
)]):
if
maxout_value
==
yk
[
c
]:
gxk
[
c
]
+=
gzk
[
r
]
# unpad the image
# unpad the image
gx
=
gx
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
gx
=
gx
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
gx_stg
[
0
]
=
gx
gx_stg
[
0
]
=
gx
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
...
@@ -794,7 +935,8 @@ class MaxPoolGrad(PoolGrad):
...
@@ -794,7 +935,8 @@ class MaxPoolGrad(PoolGrad):
ggx
,
=
grads
ggx
,
=
grads
return
([
theano
.
tensor
.
zeros_like
(
x
),
return
([
theano
.
tensor
.
zeros_like
(
x
),
theano
.
tensor
.
zeros_like
(
maxout
),
theano
.
tensor
.
zeros_like
(
maxout
),
DownsampleFactorMaxGradGrad
(
ignore_border
=
self
.
ignore_border
)(
DownsampleFactorMaxGradGrad
(
ndim
=
self
.
ndim
,
ignore_border
=
self
.
ignore_border
)(
x
,
maxout
,
ggx
,
ws
,
stride
,
pad
)]
+
x
,
maxout
,
ggx
,
ws
,
stride
,
pad
)]
+
[
DisconnectedType
()()
for
i
in
inp
[
3
:]])
[
DisconnectedType
()()
for
i
in
inp
[
3
:]])
...
@@ -805,161 +947,253 @@ class MaxPoolGrad(PoolGrad):
...
@@ -805,161 +947,253 @@ class MaxPoolGrad(PoolGrad):
assert
self
.
mode
==
'max'
assert
self
.
mode
==
'max'
x
,
z
,
gz
,
ws
,
stride
,
pad
=
inp
x
,
z
,
gz
,
ws
,
stride
,
pad
=
inp
gx
,
=
out
gx
,
=
out
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
node
.
inputs
[
0
]
.
ndim
-
2
total_ndim
=
node
.
inputs
[
0
]
.
ndim
non_pool_ndim
=
total_ndim
-
nd
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ignore_border
=
int
(
self
.
ignore_border
)
ignore_border
=
int
(
self
.
ignore_border
)
if
self
.
openmp
:
if
self
.
openmp
:
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)'
# run in parallel over each pooling block
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)'
else
:
else
:
omp_parallel
=
''
omp_parallel
=
''
return
"""
ccode
=
"""
// sanity checks
// sanity checks
int x_typenum = PyArray_ObjectType((PyObject*)
%(x)
s, 0);
int x_typenum = PyArray_ObjectType((PyObject*)
%(x)
s, 0);
int z_typenum = PyArray_ObjectType((PyObject*)
%(z)
s, 0);
int z_typenum = PyArray_ObjectType((PyObject*)
%(z)
s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)
%(gz)
s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)
%(gz)
s, 0);
int ws0, ws1, st0, st1, pd0, pd1;
int z_r, z_c;
int r, c; // shape of the padded_input
if ((x_typenum != z_typenum) || (x_typenum != gz_typenum))
if ((x_typenum != z_typenum) || (x_typenum != gz_typenum))
{
{
PyErr_SetString(PyExc_ValueError, "input types must all match");
PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_NDIM(
%(x)
s)!=
4
)
if(PyArray_NDIM(
%(x)
s)!=
%(total_ndim)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "x must be a
4d
ndarray");
PyErr_SetString(PyExc_ValueError, "x must be a
%(total_ndim)
sD
ndarray");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_NDIM(
%(z)
s)!=
4
)
if(PyArray_NDIM(
%(z)
s)!=
%(total_ndim)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "z must be a
4d
ndarray");
PyErr_SetString(PyExc_ValueError, "z must be a
%(total_ndim)
sD
ndarray");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_NDIM(
%(gz)
s)!=
4
)
if(PyArray_NDIM(
%(gz)
s)!=
%(total_ndim)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "gz must be a
4d
ndarray");
PyErr_SetString(PyExc_ValueError, "gz must be a
%(total_ndim)
sD
ndarray");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_DIM(
%(ws)
s, 0)!=
2
)
if(PyArray_DIM(
%(ws)
s, 0)!=
%(nd)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
%(nd)
s
");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_DIM(
%(stride)
s, 0)!=
2
)
if(PyArray_DIM(
%(stride)
s, 0)!=
%(nd)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
%(nd)
s
");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_DIM(
%(pad)
s, 0)!=
2
)
if(PyArray_DIM(
%(pad)
s, 0)!=
%(nd)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
%(nd)
s
");
%(fail)
s;
%(fail)
s;
}
}
// Getting ws, stride and pad
int z[
%(nd)
s]; // shape of the output
ws0 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 0));
int r[
%(nd)
s]; // shape of the padded_input
ws1 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 1));
int ws[
%(nd)
s];
st0 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 0));
int st[
%(nd)
s];
st1 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 1));
int pd[
%(nd)
s];
pd0 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 0));
int nonzero_padding;
pd1 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 1));
nonzero_padding = 0;
z_r = PyArray_DIMS(
%(z)
s)[2];
for (int i=0; i<
%(nd)
s; i++)
z_c = PyArray_DIMS(
%(z)
s)[3];
{
r = PyArray_DIMS(
%(x)
s)[2];
ws[i] = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, i));
c = PyArray_DIMS(
%(x)
s)[3];
st[i] = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, i));
r += pd0 * 2;
pd[i] = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, i));
c += pd1 * 2;
z[i] = PyArray_DIMS(
%(z)
s)[
%(non_pool_ndim)
s + i];
// allocating memory for gx
r[i] = PyArray_DIMS(
%(x)
s)[
%(non_pool_ndim)
s + i] + 2 * pd[i];
if ((!
%(gx)
s)
if (pd[i]>0)
|| !PyArray_ISCONTIGUOUS(
%(gx)
s)
nonzero_padding = 1;
|| *PyArray_DIMS(
%(gx)
s)!=4
}
||(PyArray_DIMS(
%(gx)
s)[0] != PyArray_DIMS(
%(x)
s)[0])
// allocating memory for output, if necessary
||(PyArray_DIMS(
%(gx)
s)[1] != PyArray_DIMS(
%(x)
s)[1])
int mem_nec;
||(PyArray_DIMS(
%(gx)
s)[2] != PyArray_DIMS(
%(x)
s)[2])
mem_nec = 0;
||(PyArray_DIMS(
%(gx)
s)[3] != PyArray_DIMS(
%(x)
s)[3])
if ((!
%(gx)
s) || !PyArray_ISCONTIGUOUS(
%(gx)
s)
)
|| *PyArray_DIMS(
%(gx)
s)!=
%(total_ndim)
s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<
%(total_ndim)
s; i++)
{
if (PyArray_DIMS(
%(gx)
s)[i] != PyArray_DIMS(
%(x)
s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
{
Py_XDECREF(
%(gx)
s);
Py_XDECREF(
%(gx)
s);
%(gx)
s = (PyArrayObject*) PyArray_ZEROS(
4
, PyArray_DIMS(
%(x)
s), x_typenum,0);
%(gx)
s = (PyArrayObject*) PyArray_ZEROS(
%(total_ndim)
s
, PyArray_DIMS(
%(x)
s), x_typenum,0);
}
}
else {
else {
PyArray_FILLWBYTE(
%(gx)
s, 0);
PyArray_FILLWBYTE(
%(gx)
s, 0);
}
}
dtype_
%(z)
s maximum; // temp var for maximum value in a region
dtype_
%(z)
s maximum; // temp var for maximum value in a region
if (z_r && z_c)
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<
%(nd)
s; i++)
{
z_prod *= z[i];
}
if (z_prod)
{
{
int r_st, r_end, c_st, c_end;
// will be used to hold start and end index of a region
int r_st[
%(nd)
s];
int r_end[
%(nd)
s];
// index for iterating over the pooling regions
int r_idx[
%(nd)
s];
// placeholder for PyArray indexing (output)
long int o_idx[
%(total_ndim)
s];
// placeholder for PyArray indexing (input)
long int i_idx[
%(total_ndim)
s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
non_pooling_prod *= PyArray_DIMS(
%(x)
s)[i];
}
%(omp_parallel)
s
%(omp_parallel)
s
for(int t = 0; t < PyArray_DIMS(
%(x)
s)[0] * PyArray_DIMS(
%(x)
s)[1]; t++){
// first loop over non-pooling dimensions
int b = t
%%
PyArray_DIMS(
%(x)
s)[0];
for (int t=0; t<non_pooling_prod; t++)
int k = t / PyArray_DIMS(
%(x)
s)[0];
{
for(int i=0; i < z_r; i++){
// compute the non-pooling index in each dimension
r_st = i * st0;
if (
%(non_pool_ndim)
s!=0)
r_end = r_st + ws0;
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<
%(non_pool_ndim)
s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(
%(x)
s)[i - 1];
o_idx[i - 1] =o_idx[i - 1]
%%
PyArray_DIMS(
%(x)
s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
for (r_idx[
%(i)
s]=0; r_idx[
%(i)
s] < z[
%(i)
s]; r_idx[
%(i)
s]++) {
r_st[
%(i)
s] = r_idx[
%(i)
s] * st[
%(i)
s];
r_end[
%(i)
s] = r_st[
%(i)
s] + ws[
%(i)
s];
// skip the padding
// skip the padding
r_st
= r_st < pd0 ? pd0 : r_st
;
r_st
[
%(i)
s] = r_st[
%(i)
s] < pd[
%(i)
s] ? pd[
%(i)
s] : r_st[
%(i)
s]
;
r_end
= r_end > (r - pd0) ? r - pd0 : r_end
;
r_end
[
%(i)
s] = r_end[
%(i)
s] > (r[
%(i)
s] - pd[
%(i)
s]) ? r[
%(i)
s] - pd[
%(i)
s] : r_end[
%(i)
s]
;
// from padded_img space to img space
// from padded_img space to img space
r_st -= pd0;
r_st[
%(i)
s] -= pd[
%(i)
s];
r_end -= pd0;
r_end[
%(i)
s] -= pd[
%(i)
s];
for(int j=0; j<z_c; j++){
// use the index to find the correct position in the output
c_st = j * st1;
o_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_idx[
%(i)
s];
c_end = c_st + ws1;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st;
ccode
+=
"""
c_end = c_end > (c - pd1) ? c - pd1 : c_end;
dtype_
%(gz)
s * gz;
// change coordinates from padding_img space into img space
if (
%(total_ndim)
s == 4)
c_st -= pd1;
{
c_end -= pd1;
// the maximum value
// the maximum value
maximum = ((dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s,
b,k,i,j
)))[0];
maximum = ((dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s,
o_idx[0],o_idx[1],o_idx[2],o_idx[3]
)))[0];
// the gradient corresponding to this maximum value in z
// the gradient corresponding to this maximum value in z
dtype_
%(gz)
s * gz = (
gz = ((dtype_
%(gz)
s*)(PyArray_GETPTR4(
%(gz)
s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
(dtype_
%(gz)
s*)(PyArray_GETPTR4(
%(gz)
s, b, k, i, j)));
}
else
{
// the maximum value
maximum = ((dtype_
%(z)
s*)(PyArray_GetPtr(
%(z)
s,o_idx)))[0];
// the gradient corresponding to this maximum value in z
gz = ((dtype_
%(gz)
s*)(PyArray_GetPtr(
%(gz)
s, o_idx)));
}
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m++)
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s++)
{
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(x)
s a;
dtype_
%(gx)
s * gx;
if (
%(total_ndim)
s == 4)
{
{
for(int n=c_st; n<c_end; n++)
a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
gx = ((dtype_
%(gx)
s*)(PyArray_GETPTR4(
%(gx)
s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
}
else
{
{
dtype_
%(x)
s a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,m,n
)))[0];
a = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx
)))[0];
dtype_
%(gx)
s * gx = (
gx = ((dtype_
%(gx)
s*)(PyArray_GetPtr(
%(gx)
s, i_idx)));
(dtype_
%(gx)
s*)(PyArray_GETPTR4(
%(gx)
s, b, k, m, n)));
}
if (a == maximum){
if (a == maximum){
gx[0] = gx[0] + gz[0];
gx[0] = gx[0] + gz[0];
}
}
}
"""
}
for
i
in
xrange
(
nd
):
}
ccode
+=
"""
}
} // for loop over region
}
"""
}
for
i
in
xrange
(
nd
):
"""
%
locals
()
ccode
+=
"""
} // loop over pooling dimension
"""
ccode
+=
"""
} // for loop over non-pooling dimensions
} // if z_prod
"""
return
ccode
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
0
,
9
,
self
.
openmp
)
return
(
0
,
10
,
self
.
openmp
)
class
AveragePoolGrad
(
PoolGrad
):
class
AveragePoolGrad
(
PoolGrad
):
def
__init__
(
self
,
ignore_border
,
mode
=
'average_inc_pad'
):
def
__init__
(
self
,
ignore_border
,
mode
=
'average_inc_pad'
,
ndim
=
None
):
assert
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]
assert
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]
PoolGrad
.
__init__
(
self
,
ignore_border
,
mode
)
PoolGrad
.
__init__
(
self
,
ignore_border
,
mode
,
ndim
)
# There is an extra dummy parameter to match the parameter count
# There is an extra dummy parameter to match the parameter count
# of MaxPoolGrad. They have to keep the same interface because of
# of MaxPoolGrad. They have to keep the same interface because of
# the DownsampleFactorMaxGrad trick to keep old scripts working
# the DownsampleFactorMaxGrad trick to keep old scripts working
# (see downsample.py for details on this).
# (see downsample.py for details on this).
def
make_node
(
self
,
x
,
gz
,
ws
,
stride
=
None
,
pad
=
(
0
,
0
)
,
dummy
=
None
):
def
make_node
(
self
,
x
,
gz
,
ws
,
stride
=
None
,
pad
=
None
,
dummy
=
None
):
# make_node should only be called by the grad function of
# make_node should only be called by the grad function of
# Pool, so these asserts should not fail.
# Pool, so these asserts should not fail.
x
=
tensor
.
as_tensor_variable
(
x
)
x
=
tensor
.
as_tensor_variable
(
x
)
gz
=
tensor
.
as_tensor_variable
(
gz
)
gz
=
tensor
.
as_tensor_variable
(
gz
)
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
x
.
ndim
-
2
if
stride
is
None
:
if
stride
is
None
:
stride
=
ws
stride
=
ws
if
pad
is
None
:
pad
=
(
0
,)
*
nd
ws
=
tensor
.
as_tensor_variable
(
ws
)
ws
=
tensor
.
as_tensor_variable
(
ws
)
stride
=
tensor
.
as_tensor_variable
(
stride
)
stride
=
tensor
.
as_tensor_variable
(
stride
)
pad
=
tensor
.
as_tensor_variable
(
pad
)
pad
=
tensor
.
as_tensor_variable
(
pad
)
assert
isinstance
(
x
,
Variable
)
and
x
.
ndim
==
4
assert
isinstance
(
x
,
Variable
)
and
x
.
ndim
>=
nd
assert
isinstance
(
gz
,
Variable
)
and
gz
.
ndim
==
4
assert
isinstance
(
gz
,
Variable
)
and
gz
.
ndim
>=
nd
assert
isinstance
(
ws
,
Variable
)
and
ws
.
ndim
==
1
assert
isinstance
(
ws
,
Variable
)
and
ws
.
ndim
==
1
assert
isinstance
(
stride
,
Variable
)
and
stride
.
ndim
==
1
assert
isinstance
(
stride
,
Variable
)
and
stride
.
ndim
==
1
assert
x
.
ndim
==
gz
.
ndim
>=
nd
assert
isinstance
(
pad
,
Variable
)
and
pad
.
ndim
==
1
assert
isinstance
(
pad
,
Variable
)
and
pad
.
ndim
==
1
if
not
ws
.
dtype
.
startswith
(
'int'
):
if
not
ws
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
...
@@ -972,96 +1206,333 @@ class AveragePoolGrad(PoolGrad):
...
@@ -972,96 +1206,333 @@ class AveragePoolGrad(PoolGrad):
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
x
,
gz
,
ws
,
stride
,
pad
=
inp
x
,
gz
,
ws
,
stride
,
pad
=
inp
gx_stg
,
=
out
gx_stg
,
=
out
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
2
,)
nd
=
self
.
ndim
if
self
.
mode
==
'average_exc_pad'
and
pad
[
0
]
!=
0
and
pad
[
1
]
!=
0
:
if
nd
is
None
:
nd
=
len
(
x
.
shape
)
-
2
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
nd
,)
if
len
(
x
.
shape
)
<
nd
:
raise
NotImplementedError
(
'AveragePoolGrad requires input with {} or more dimensions'
.
format
(
nd
))
if
self
.
mode
==
'average_exc_pad'
and
max
(
pad
)
!=
0
:
raise
NotImplementedError
()
raise
NotImplementedError
()
z_shape
=
self
.
out_shape
(
x
.
shape
,
ws
,
self
.
ignore_border
,
stride
,
pad
)
z_shape
=
self
.
out_shape
(
x
.
shape
,
ws
,
self
.
ignore_border
,
stride
,
pad
,
nd
)
if
(
gx_stg
[
0
]
is
None
)
or
(
gx_stg
[
0
]
.
shape
!=
z_shape
):
if
(
gx_stg
[
0
]
is
None
)
or
(
gx_stg
[
0
]
.
shape
!=
z_shape
):
gx_stg
[
0
]
=
numpy
.
empty
(
z_shape
,
dtype
=
x
.
dtype
)
gx_stg
[
0
]
=
numpy
.
empty
(
z_shape
,
dtype
=
x
.
dtype
)
zz
=
gx_stg
[
0
]
zz
=
gx_stg
[
0
]
# number of pooling output rows
# size of pooling output
pr
=
zz
.
shape
[
-
2
]
pool_out_shp
=
zz
.
shape
[
-
nd
:]
# number of pooling output cols
img_shp
=
tuple
(
x
.
shape
[
-
nd
+
i
]
+
2
*
pad
[
i
]
for
i
in
xrange
(
nd
))
pc
=
zz
.
shape
[
-
1
]
ws0
,
ws1
=
ws
st0
,
st1
=
stride
pad_h
=
pad
[
0
]
pad_w
=
pad
[
1
]
img_rows
=
x
.
shape
[
-
2
]
+
2
*
pad_h
img_cols
=
x
.
shape
[
-
1
]
+
2
*
pad_w
inc_pad
=
self
.
mode
==
'average_inc_pad'
inc_pad
=
self
.
mode
==
'average_inc_pad'
sum_mode
=
self
.
mode
==
'sum'
sum_mode
=
self
.
mode
==
'sum'
# pad the image
# initialize the padded output
if
(
pad_h
,
pad_w
)
!=
(
0
,
0
):
gx
=
numpy
.
zeros
((
x
.
shape
[:
-
nd
]
+
img_shp
),
dtype
=
x
.
dtype
)
y
=
numpy
.
zeros
(
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
# precompute the region boundaries and sizes for each dimension
dtype
=
x
.
dtype
)
region_slices
=
[[]
for
i
in
xrange
(
nd
)]
y
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
=
x
region_sizes
=
[[]
for
i
in
xrange
(
nd
)]
else
:
for
i
in
xrange
(
nd
):
y
=
x
for
j
in
xrange
(
pool_out_shp
[
i
]):
gx
=
numpy
.
zeros_like
(
y
)
for
n
in
xrange
(
x
.
shape
[
0
]):
for
k
in
xrange
(
x
.
shape
[
1
]):
for
r
in
xrange
(
pr
):
if
sum_mode
or
inc_pad
:
row_st
=
r
*
st0
else
:
row_st
=
builtins
.
max
(
r
*
st0
,
pad_h
)
row_end
=
builtins
.
min
(
row_st
+
ws0
,
img_rows
)
for
c
in
xrange
(
pc
):
if
sum_mode
or
inc_pad
:
if
sum_mode
or
inc_pad
:
col_st
=
c
*
st1
start
=
j
*
stride
[
i
]
else
:
else
:
col_st
=
builtins
.
max
(
c
*
st1
,
pad_w
)
start
=
builtins
.
max
(
j
*
stride
[
i
],
pad
[
i
])
col_end
=
builtins
.
min
(
col_st
+
ws1
,
img_cols
)
end
=
builtins
.
min
(
start
+
ws
[
i
],
img_shp
[
i
])
region_slices
[
i
]
.
append
(
slice
(
start
,
end
))
region_sizes
[
i
]
.
append
(
end
-
start
)
# iterate over non-pooling dimensions
region_slice
=
[
None
]
*
nd
for
k
in
numpy
.
ndindex
(
*
x
.
shape
[:
-
nd
]):
gzk
=
gz
[
k
]
gxk
=
gx
[
k
]
# iterate over pooling regions
for
r
in
numpy
.
ndindex
(
*
pool_out_shp
):
region_size
=
1
for
i
in
xrange
(
nd
):
region_slice
[
i
]
=
region_slices
[
i
][
r
[
i
]]
region_size
*=
region_sizes
[
i
][
r
[
i
]]
if
sum_mode
:
if
sum_mode
:
val
=
gz
[
n
,
k
,
r
,
c
]
val
=
gzk
[
r
]
else
:
else
:
val
=
gz
[
n
,
k
,
r
,
c
]
/
((
row_end
-
row_st
)
*
# divide by region size
(
col_end
-
col_st
))
val
=
gzk
[
r
]
/
region_size
gx
[
n
,
k
,
row_st
:
row_end
,
col_st
:
col_end
]
+=
val
gxk
[
region_slice
]
+=
val
# unpad the image
# unpad the image
gx
=
gx
[:,
:,
pad_h
:(
img_rows
-
pad_h
),
pad_w
:(
img_cols
-
pad_w
)]
gx
=
gx
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
gx_stg
[
0
]
=
gx
gx_stg
[
0
]
=
gx
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
x
,
gz
,
ws
,
stride
,
pad
=
inp
x
,
gz
,
ws
,
stride
,
pad
=
inp
ggx
,
=
grads
ggx
,
=
grads
return
([
theano
.
tensor
.
zeros_like
(
x
),
return
([
theano
.
tensor
.
zeros_like
(
x
),
Pool
(
ignore_border
=
self
.
ignore_border
,
mode
=
self
.
mode
)(
ggx
,
Pool
(
ignore_border
=
self
.
ignore_border
,
ndim
=
self
.
ndim
,
mode
=
self
.
mode
)(
ggx
,
ws
,
stride
,
pad
)]
+
[
DisconnectedType
()()
for
i
in
inp
[
2
:]])
ws
,
stride
,
pad
)]
+
[
DisconnectedType
()()
for
i
in
inp
[
2
:]])
def
connection_pattern
(
self
,
node
):
def
connection_pattern
(
self
,
node
):
return
[[
1
],
[
1
],
[
0
],
[
0
],
[
0
]]
return
[[
1
],
[
1
],
[
0
],
[
0
],
[
0
]]
def
c_code
(
self
,
node
,
name
,
inp
,
out
,
sub
):
x
,
gz
,
ws
,
stride
,
pad
=
inp
gx
,
=
out
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
node
.
inputs
[
0
]
.
ndim
-
2
total_ndim
=
node
.
inputs
[
0
]
.
ndim
non_pool_ndim
=
total_ndim
-
nd
fail
=
sub
[
'fail'
]
inc_pad
=
int
(
self
.
mode
==
'average_inc_pad'
)
sum_mode
=
int
(
self
.
mode
==
'sum'
)
if
self
.
openmp
:
# run in parallel over each pooling block
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, r_pad_width, r_idx, i_idx, o_idx) schedule(static)'
else
:
omp_parallel
=
''
ccode
=
"""
// sanity checks
int x_typenum = PyArray_ObjectType((PyObject*)
%(x)
s, 0);
int gz_typenum = PyArray_ObjectType((PyObject*)
%(gz)
s, 0);
if (x_typenum != gz_typenum)
{
PyErr_SetString(PyExc_ValueError, "input types must all match");
%(fail)
s;
}
if(PyArray_NDIM(
%(x)
s)!=
%(total_ndim)
s)
{
PyErr_SetString(PyExc_ValueError, "x must be a
%(total_ndim)
sD ndarray");
%(fail)
s;
}
if(PyArray_NDIM(
%(gz)
s)!=
%(total_ndim)
s)
{
PyErr_SetString(PyExc_ValueError, "gz must be a
%(total_ndim)
sD ndarray");
%(fail)
s;
}
if(PyArray_DIM(
%(ws)
s, 0)!=
%(nd)
s)
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
%(nd)
s");
%(fail)
s;
}
if(PyArray_DIM(
%(stride)
s, 0)!=
%(nd)
s)
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
%(nd)
s");
%(fail)
s;
}
if(PyArray_DIM(
%(pad)
s, 0)!=
%(nd)
s)
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
%(nd)
s");
%(fail)
s;
}
int z[
%(nd)
s]; // shape of the output
int r[
%(nd)
s]; // shape of the padded_input
int ws[
%(nd)
s];
int st[
%(nd)
s];
int pd[
%(nd)
s];
int nonzero_padding;
nonzero_padding = 0;
for (int i=0; i<
%(nd)
s; i++)
{
ws[i] = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, i));
st[i] = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, i));
pd[i] = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, i));
z[i] = PyArray_DIMS(
%(gz)
s)[
%(non_pool_ndim)
s + i];
r[i] = PyArray_DIMS(
%(x)
s)[
%(non_pool_ndim)
s + i] + 2 * pd[i];
if (pd[i]>0)
nonzero_padding = 1;
}
if (!
%(inc_pad)
s && !
%(sum_mode)
s && nonzero_padding)
{
PyErr_SetString(PyExc_ValueError,
"padding must be zero for average_exc_pad");
%(fail)
s;
}
// allocating memory for output, if necessary
int mem_nec;
mem_nec = 0;
if ((!
%(gx)
s) || !PyArray_ISCONTIGUOUS(
%(gx)
s)
|| *PyArray_DIMS(
%(gx)
s)!=
%(total_ndim)
s)
{
mem_nec = 1;
}
if (!mem_nec)
{
for (int i=0; i<
%(total_ndim)
s; i++)
{
if (PyArray_DIMS(
%(gx)
s)[i] != PyArray_DIMS(
%(x)
s)[i])
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
Py_XDECREF(
%(gx)
s);
%(gx)
s = (PyArrayObject*) PyArray_ZEROS(
%(total_ndim)
s, PyArray_DIMS(
%(x)
s), x_typenum,0);
}
else {
PyArray_FILLWBYTE(
%(gx)
s, 0);
}
int z_prod;
// do not run if any z[i] is zero
z_prod = 1;
for (int i=0; i<
%(nd)
s; i++)
{
z_prod *= z[i];
}
if (z_prod)
{
// will be used to hold start and end index of a region
int r_st[
%(nd)
s];
int r_end[
%(nd)
s];
// padded region size
int r_pad_width[
%(nd)
s];
// index for iterating over the pooling regions
int r_idx[
%(nd)
s];
// placeholder for PyArray indexing (output)
long int o_idx[
%(total_ndim)
s];
// placeholder for PyArray indexing (input)
long int i_idx[
%(total_ndim)
s];
// loop over non-pooling dimensions
int non_pooling_prod = 1;
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
non_pooling_prod *= PyArray_DIMS(
%(x)
s)[i];
}
%(omp_parallel)
s
// first loop over non-pooling dimensions
for (int t=0; t<non_pooling_prod; t++)
{
// compute the non-pooling index in each dimension
if (
%(non_pool_ndim)
s!=0)
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<
%(non_pool_ndim)
s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(
%(x)
s)[i - 1];
o_idx[i - 1] =o_idx[i - 1]
%%
PyArray_DIMS(
%(x)
s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
for (r_idx[
%(i)
s]=0; r_idx[
%(i)
s] < z[
%(i)
s]; r_idx[
%(i)
s]++) {
r_st[
%(i)
s] = r_idx[
%(i)
s] * st[
%(i)
s];
if (!
%(sum_mode)
s && !
%(inc_pad)
s && r_st[
%(i)
s] < pd[
%(i)
s])
{
r_st[
%(i)
s] = pd[
%(i)
s];
}
r_end[
%(i)
s] = r_st[
%(i)
s] + ws[
%(i)
s];
r_end[
%(i)
s] = r_end[
%(i)
s] > r[
%(i)
s] ? r[
%(i)
s] : r_end[
%(i)
s];
r_pad_width[
%(i)
s] = r_end[
%(i)
s] - r_st[
%(i)
s];
// from padded_img space to img space
r_st[
%(i)
s] = r_st[
%(i)
s] - pd[
%(i)
s] > 0 ? r_st[
%(i)
s] - pd[
%(i)
s] : 0;
r_end[
%(i)
s] = r_end[
%(i)
s] > r[
%(i)
s] - pd[
%(i)
s] ? r[
%(i)
s] - 2 * pd[
%(i)
s] : r_end[
%(i)
s] - pd[
%(i)
s];
// use the index to find the correct position in the output
o_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_idx[
%(i)
s];
"""
%
dict
(
i
=
i
,
sum_mode
=
sum_mode
,
inc_pad
=
inc_pad
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(gz)
s * gz;
dtype_
%(gz)
s val;
if (
%(total_ndim)
s == 4)
{
// the gradient for this region
gz = ((dtype_
%(gz)
s*)(PyArray_GETPTR4(
%(gz)
s, o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the gradient for this region
gz = ((dtype_
%(gz)
s*)(PyArray_GetPtr(
%(gz)
s, o_idx)));
}
// compute the contribution
if (
%(sum_mode)
s)
{
val = gz[0];
}
else
{
val = gz[0] / (
%(region_size)
s);
}
"""
region_size
=
' * '
.
join
(
'r_pad_width[
%
d]'
%
i
for
i
in
xrange
(
nd
))
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s++)
{
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(gx)
s * gx;
if (
%(total_ndim)
s == 4)
{
gx = ((dtype_
%(gx)
s*)(PyArray_GETPTR4(
%(gx)
s, i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
}
else
{
gx = ((dtype_
%(gx)
s*)(PyArray_GetPtr(
%(gx)
s, i_idx)));
}
gx[0] = gx[0] + val;
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // for loop over region
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // loop over pooling dimension
"""
ccode
+=
"""
} // for loop over non-pooling dimensions
} // if z_prod
"""
return
ccode
%
locals
()
def
c_code_cache_version
(
self
):
return
(
0
,
3
,
self
.
openmp
)
class
DownsampleFactorMaxGradGrad
(
OpenMPOp
):
class
DownsampleFactorMaxGradGrad
(
OpenMPOp
):
__props__
=
(
'ignore_border'
,
'mode'
)
__props__
=
(
'
ndim'
,
'
ignore_border'
,
'mode'
)
def
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
openmp
=
None
):
def
__init__
(
self
,
ignore_border
,
mode
=
'max'
,
ndim
=
None
,
openmp
=
None
):
self
.
ndim
=
ndim
self
.
ignore_border
=
ignore_border
self
.
ignore_border
=
ignore_border
self
.
mode
=
mode
self
.
mode
=
mode
super
(
DownsampleFactorMaxGradGrad
,
self
)
.
__init__
(
openmp
=
openmp
)
super
(
DownsampleFactorMaxGradGrad
,
self
)
.
__init__
(
openmp
=
openmp
)
assert
self
.
mode
==
'max'
assert
self
.
mode
==
'max'
def
make_node
(
self
,
x
,
maxout
,
gz
,
ws
,
stride
=
None
,
pad
=
(
0
,
0
)
):
def
make_node
(
self
,
x
,
maxout
,
gz
,
ws
,
stride
=
None
,
pad
=
None
):
# make_node should only be called by the grad function of
# make_node should only be called by the grad function of
# MaxPoolGrad, so these asserts should not fail.
# MaxPoolGrad, so these asserts should not fail.
x
=
tensor
.
as_tensor_variable
(
x
)
x
=
tensor
.
as_tensor_variable
(
x
)
maxout
=
tensor
.
as_tensor_variable
(
maxout
)
maxout
=
tensor
.
as_tensor_variable
(
maxout
)
gz
=
tensor
.
as_tensor_variable
(
gz
)
gz
=
tensor
.
as_tensor_variable
(
gz
)
assert
x
.
ndim
==
4
nd
=
self
.
ndim
assert
maxout
.
ndim
==
4
if
nd
is
None
:
assert
gz
.
ndim
==
4
nd
=
x
.
ndim
-
2
if
stride
is
None
:
if
stride
is
None
:
stride
=
ws
stride
=
ws
if
isinstance
(
pad
,
(
tuple
,
list
)):
if
pad
is
None
:
if
tuple
(
pad
)
!=
(
0
,
0
)
and
not
self
.
ignore_border
:
pad
=
(
0
,)
*
nd
elif
isinstance
(
pad
,
(
tuple
,
list
)):
if
max
(
pad
)
!=
0
and
not
self
.
ignore_border
:
raise
NotImplementedError
(
raise
NotImplementedError
(
'padding works only with ignore_border=True'
)
'padding works only with ignore_border=True'
)
if
isinstance
(
ws
,
(
tuple
,
list
)):
if
isinstance
(
ws
,
(
tuple
,
list
)):
if
pad
[
0
]
>=
ws
[
0
]
or
pad
[
1
]
>=
ws
[
1
]
:
if
any
(
pad
[
i
]
>=
ws
[
i
]
for
i
in
range
(
nd
))
:
raise
NotImplementedError
(
raise
NotImplementedError
(
'padding_h and padding_w must be smaller than strides'
)
'padding_h and padding_w must be smaller than strides'
)
ws
=
tensor
.
as_tensor_variable
(
ws
)
ws
=
tensor
.
as_tensor_variable
(
ws
)
...
@@ -1070,6 +1541,7 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
...
@@ -1070,6 +1541,7 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
assert
ws
.
ndim
==
1
assert
ws
.
ndim
==
1
assert
stride
.
ndim
==
1
assert
stride
.
ndim
==
1
assert
pad
.
ndim
==
1
assert
pad
.
ndim
==
1
assert
x
.
ndim
==
maxout
.
ndim
==
gz
.
ndim
>=
nd
if
not
ws
.
dtype
.
startswith
(
'int'
):
if
not
ws
.
dtype
.
startswith
(
'int'
):
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
raise
TypeError
(
'Pool downsample parameters must be ints.'
)
if
not
stride
.
dtype
.
startswith
(
'int'
):
if
not
stride
.
dtype
.
startswith
(
'int'
):
...
@@ -1081,49 +1553,56 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
...
@@ -1081,49 +1553,56 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
def
perform
(
self
,
node
,
inp
,
out
):
def
perform
(
self
,
node
,
inp
,
out
):
x
,
maxout
,
ggx
,
ws
,
stride
,
pad
=
inp
x
,
maxout
,
ggx
,
ws
,
stride
,
pad
=
inp
z
,
=
out
z
,
=
out
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
2
,)
nd
=
self
.
ndim
if
len
(
x
.
shape
)
!=
4
:
if
nd
is
None
:
nd
=
len
(
x
.
shape
)
-
2
assert
ws
.
shape
==
stride
.
shape
==
pad
.
shape
==
(
nd
,)
if
len
(
x
.
shape
)
<
nd
:
raise
NotImplementedError
(
raise
NotImplementedError
(
'DownsampleFactorMaxGradGrad requires 4D input for now'
)
'DownsampleFactorMaxGradGrad requires input '
'with {} or more dimensions'
.
format
(
nd
))
if
(
z
[
0
]
is
None
)
or
(
z
[
0
]
.
shape
!=
maxout
.
shape
):
if
(
z
[
0
]
is
None
)
or
(
z
[
0
]
.
shape
!=
maxout
.
shape
):
z
[
0
]
=
numpy
.
zeros
(
maxout
.
shape
,
dtype
=
x
.
dtype
)
z
[
0
]
=
numpy
.
zeros
(
maxout
.
shape
,
dtype
=
x
.
dtype
)
ggz
=
z
[
0
]
# grad wrt maxout_grad has the same shape as maxout
ggz
=
z
[
0
]
# grad wrt maxout_grad has the same shape as maxout
# number of pooling output rows
# size of pooling output
pr
=
ggz
.
shape
[
-
2
]
pool_out_shp
=
ggz
.
shape
[
-
nd
:]
# number of pooling output cols
img_shp
=
tuple
(
x
.
shape
[
-
nd
+
i
]
+
2
*
pad
[
i
]
for
i
in
xrange
(
nd
))
pc
=
ggz
.
shape
[
-
1
]
ws0
,
ws1
=
ws
st0
,
st1
=
stride
pd0
,
pd1
=
pad
img_rows
=
x
.
shape
[
-
2
]
+
2
*
pd0
img_cols
=
x
.
shape
[
-
1
]
+
2
*
pd1
# pad the image and its gradients
# pad the image and its gradients
if
pd0
!=
0
and
pd1
!=
0
:
if
max
(
pad
)
>
0
:
y_padded
=
numpy
.
zeros
(
y_padded
=
numpy
.
zeros
(
x
.
shape
[:
-
nd
]
+
img_shp
,
dtype
=
x
.
dtype
)
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
y_padded
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
dtype
=
x
.
dtype
)
+
x
.
min
()
-
1
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
=
x
y_padded
[:,
:,
pd0
:(
img_rows
-
pd0
),
pd1
:(
img_cols
-
pd1
)]
=
x
ggx_padded
=
numpy
.
zeros
(
x
.
shape
[:
-
nd
]
+
img_shp
,
dtype
=
x
.
dtype
)
ggx_padded
=
numpy
.
zeros
(
ggx_padded
[(
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
(
x
.
shape
[
0
],
x
.
shape
[
1
],
img_rows
,
img_cols
),
tuple
(
slice
(
pad
[
i
],
img_shp
[
i
]
-
pad
[
i
])
for
i
in
xrange
(
nd
))]
=
ggx
dtype
=
x
.
dtype
)
ggx_padded
[:,
:,
pd0
:(
img_rows
-
pd0
),
pd1
:(
img_cols
-
pd1
)]
=
ggx
else
:
else
:
y_padded
=
x
y_padded
=
x
ggx_padded
=
ggx
ggx_padded
=
ggx
for
n
in
xrange
(
x
.
shape
[
0
]):
for
k
in
xrange
(
x
.
shape
[
1
]):
# precompute the region boundaries for each dimension
for
r
in
xrange
(
pr
):
region_ranges
=
[[]
for
i
in
xrange
(
nd
)]
row_st
=
r
*
st0
for
i
in
xrange
(
nd
):
row_end
=
builtins
.
min
(
row_st
+
ws0
,
img_rows
)
for
j
in
xrange
(
pool_out_shp
[
i
]):
for
c
in
xrange
(
pc
):
start
=
j
*
stride
[
i
]
col_st
=
c
*
st1
end
=
builtins
.
min
(
start
+
ws
[
i
],
img_shp
[
i
])
col_end
=
builtins
.
min
(
col_st
+
ws1
,
img_cols
)
region_ranges
[
i
]
.
append
(
xrange
(
start
,
end
))
for
row_ind
in
xrange
(
row_st
,
row_end
):
for
col_ind
in
xrange
(
col_st
,
col_end
):
# iterate over non-pooling dimensions
if
(
maxout
[
n
,
k
,
r
,
c
]
==
y_padded
[
n
,
k
,
row_ind
,
col_ind
]):
for
k
in
numpy
.
ndindex
(
*
x
.
shape
[:
-
nd
]):
ggz
[
n
,
k
,
r
,
c
]
=
ggx_padded
[
n
,
k
,
row_ind
,
col_ind
]
ggxk
=
ggx_padded
[
k
]
ggzk
=
ggz
[
k
]
yk
=
y_padded
[
k
]
maxoutk
=
maxout
[
k
]
# iterate over pooling regions
for
r
in
numpy
.
ndindex
(
*
pool_out_shp
):
# iterate inside region
maxout_value
=
maxoutk
[
r
]
for
c
in
itertools
.
product
(
*
[
region_ranges
[
i
][
r
[
i
]]
for
i
in
xrange
(
nd
)]):
if
maxout_value
==
yk
[
c
]:
ggzk
[
r
]
+=
ggxk
[
c
]
def
infer_shape
(
self
,
node
,
in_shapes
):
def
infer_shape
(
self
,
node
,
in_shapes
):
return
[
in_shapes
[
1
]]
return
[
in_shapes
[
1
]]
...
@@ -1133,7 +1612,8 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
...
@@ -1133,7 +1612,8 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
gz
,
=
grads
gz
,
=
grads
return
[
theano
.
tensor
.
zeros_like
(
x
),
return
[
theano
.
tensor
.
zeros_like
(
x
),
theano
.
tensor
.
zeros_like
(
maxout
),
theano
.
tensor
.
zeros_like
(
maxout
),
MaxPoolGrad
(
ignore_border
=
self
.
ignore_border
)(
x
,
maxout
,
gz
,
MaxPoolGrad
(
ignore_border
=
self
.
ignore_border
,
ndim
=
self
.
ndim
)(
x
,
maxout
,
gz
,
ws
,
stride
,
pad
),
ws
,
stride
,
pad
),
DisconnectedType
()(),
DisconnectedType
()(),
DisconnectedType
()(),
DisconnectedType
()(),
...
@@ -1147,106 +1627,182 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
...
@@ -1147,106 +1627,182 @@ class DownsampleFactorMaxGradGrad(OpenMPOp):
raise
theano
.
gof
.
utils
.
MethodNotDefined
()
raise
theano
.
gof
.
utils
.
MethodNotDefined
()
x
,
maxout
,
ggx
,
ws
,
stride
,
pad
=
inp
x
,
maxout
,
ggx
,
ws
,
stride
,
pad
=
inp
z
,
=
out
# the grad of grad
z
,
=
out
# the grad of grad
nd
=
self
.
ndim
if
nd
is
None
:
nd
=
node
.
inputs
[
0
]
.
ndim
-
2
total_ndim
=
node
.
inputs
[
0
]
.
ndim
non_pool_ndim
=
total_ndim
-
nd
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ignore_border
=
int
(
self
.
ignore_border
)
ignore_border
=
int
(
self
.
ignore_border
)
if
self
.
openmp
:
if
self
.
openmp
:
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, c_st, c_end, maximum) schedule(static)'
# run in parallel over each pooling block
omp_parallel
=
'#pragma omp parallel for private(r_st, r_end, r_idx, i_idx, o_idx, maximum) schedule(static)'
else
:
else
:
omp_parallel
=
''
omp_parallel
=
''
return
"""
ccode
=
"""
int ws0, ws1, st0, st1, pd0, pd1;
int z_typenum = PyArray_ObjectType((PyObject*)
%(maxout)
s, 0);
int z_typenum = PyArray_ObjectType((PyObject*)
%(maxout)
s, 0);
int z_r, z_c;
int z[
%(nd)
s]; // shape of the output
int r, c; // shape of the padded_input
int r[
%(nd)
s]; // shape of the padded_input
if(PyArray_DIM(
%(ws)
s, 0)!=2)
int ws[
%(nd)
s];
int st[
%(nd)
s];
int pd[
%(nd)
s];
if(PyArray_DIM(
%(ws)
s, 0)!=
%(nd)
s)
{
{
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "ws must be a vector of size
%(nd)
s
");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_DIM(
%(stride)
s, 0)!=
2
)
if(PyArray_DIM(
%(stride)
s, 0)!=
%(nd)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "stride must be a vector of size
%(nd)
s
");
%(fail)
s;
%(fail)
s;
}
}
if(PyArray_DIM(
%(pad)
s, 0)!=
2
)
if(PyArray_DIM(
%(pad)
s, 0)!=
%(nd)
s
)
{
{
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
2
");
PyErr_SetString(PyExc_ValueError, "pad must be a vector of size
%(nd)
s
");
%(fail)
s;
%(fail)
s;
}
}
// Getting ws, stride and pad
for (int i=0; i<
%(nd)
s; i++)
ws0 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 0));
{
ws1 = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, 1));
ws[i] = *((npy_intp*)PyArray_GETPTR1(
%(ws)
s, i));
st0 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 0));
st[i] = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, i));
st1 = *((npy_intp*)PyArray_GETPTR1(
%(stride)
s, 1));
pd[i] = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, i));
pd0 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 0));
z[i] = PyArray_DIMS(
%(maxout)
s)[
%(non_pool_ndim)
s + i];
pd1 = *((npy_intp*)PyArray_GETPTR1(
%(pad)
s, 1));
r[i] = PyArray_DIMS(
%(x)
s)[
%(non_pool_ndim)
s + i] + 2 * pd[i];
z_r = PyArray_DIMS(
%(maxout)
s)[2];
}
z_c = PyArray_DIMS(
%(maxout)
s)[3];
// allocating memory for output, if necessary
r = PyArray_DIMS(
%(x)
s)[2];
int mem_nec;
c = PyArray_DIMS(
%(x)
s)[3];
mem_nec = 0;
r += pd0 * 2;
if ((!
%(z)
s) || !PyArray_ISCONTIGUOUS(
%(z)
s)
c += pd1 * 2;
|| *PyArray_DIMS(
%(z)
s)!=
%(total_ndim)
s)
// allocating memory for output
{
if ((!
%(z)
s)
mem_nec = 1;
|| !PyArray_ISCONTIGUOUS(
%(z)
s)
}
|| *PyArray_DIMS(
%(z)
s)!=4
if (!mem_nec)
||(PyArray_DIMS(
%(z)
s)[0] != PyArray_DIMS(
%(maxout)
s)[0])
{
||(PyArray_DIMS(
%(z)
s)[1] != PyArray_DIMS(
%(maxout)
s)[1])
for (int i=0; i<
%(total_ndim)
s; i++)
||(PyArray_DIMS(
%(z)
s)[2] != PyArray_DIMS(
%(maxout)
s)[2])
{
||(PyArray_DIMS(
%(z)
s)[3] != PyArray_DIMS(
%(maxout)
s)[3])
if (PyArray_DIMS(
%(z)
s)[i] != PyArray_DIMS(
%(maxout)
s)[i])
)
{
mem_nec = 1;
break;
}
}
}
if (mem_nec)
{
{
Py_XDECREF(
%(z)
s);
Py_XDECREF(
%(z)
s);
%(z)
s = (PyArrayObject*) PyArray_ZEROS(
4
, PyArray_DIMS(
%(maxout)
s), z_typenum,0);
%(z)
s = (PyArrayObject*) PyArray_ZEROS(
%(total_ndim)
s
, PyArray_DIMS(
%(maxout)
s), z_typenum,0);
}
}
else {
else {
PyArray_FILLWBYTE(
%(z)
s, 0);
PyArray_FILLWBYTE(
%(z)
s, 0);
}
}
dtype_
%(maxout)
s maximum; // temp var for maximum value in a region
dtype_
%(maxout)
s maximum; // temp var for maximum value in a region
int r_st, r_end, c_st, c_end;
// will be used to hold start and end index of a region
int r_st[
%(nd)
s];
int r_end[
%(nd)
s];
// index for iterating over the pooling regions
int r_idx[
%(nd)
s];
// placeholder for PyArray indexing (output)
long int o_idx[
%(total_ndim)
s];
// placeholder for PyArray indexing (input)
long int i_idx[
%(total_ndim)
s];
// loop over non-pooling dimensions
int non_pooling_prod;
non_pooling_prod = 1;
for (int i=0; i<
%(non_pool_ndim)
s; i++)
{
non_pooling_prod *= PyArray_DIMS(
%(x)
s)[i];
}
%(omp_parallel)
s
%(omp_parallel)
s
for(int t = 0; t < PyArray_DIMS(
%(x)
s)[0] * PyArray_DIMS(
%(x)
s)[1]; t++){
// first loop over non-pooling dimensions
int b = t
%%
PyArray_DIMS(
%(x)
s)[0];
for (int t=0; t<non_pooling_prod; t++)
int k = t / PyArray_DIMS(
%(x)
s)[0];
{
for(int i=0; i < z_r; i++){
// compute the non-pooling index in each dimension
r_st = i * st0;
if (
%(non_pool_ndim)
s!=0)
r_end = r_st + ws0;
{
o_idx[0] = t;
i_idx[0] = t;
for (int i=1; i<
%(non_pool_ndim)
s; i++)
{
o_idx[i] = o_idx[i - 1] / PyArray_DIMS(
%(x)
s)[i - 1];
o_idx[i - 1] = o_idx[i - 1]
%%
PyArray_DIMS(
%(x)
s)[i - 1];
i_idx[i] = o_idx[i];
i_idx[i - 1] = o_idx[i - 1];
}
}
// then loop over each region in each pooling dimension
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
for (r_idx[
%(i)
s]=0; r_idx[
%(i)
s] < z[
%(i)
s]; r_idx[
%(i)
s]++) {
r_st[
%(i)
s] = r_idx[
%(i)
s] * st[
%(i)
s];
r_end[
%(i)
s] = r_st[
%(i)
s] + ws[
%(i)
s];
// skip the padding
// skip the padding
r_st
= r_st < pd0 ? pd0 : r_st
;
r_st
[
%(i)
s] = r_st[
%(i)
s] < pd[
%(i)
s] ? pd[
%(i)
s] : r_st[
%(i)
s]
;
r_end
= r_end > (r - pd0) ? r - pd0 : r_end
;
r_end
[
%(i)
s] = r_end[
%(i)
s] > (r[
%(i)
s] - pd[
%(i)
s]) ? r[
%(i)
s] - pd[
%(i)
s] : r_end[
%(i)
s]
;
// from padded_img space to img space
// from padded_img space to img space
r_st -= pd0;
r_st[
%(i)
s] -= pd[
%(i)
s];
r_end -= pd0;
r_end[
%(i)
s] -= pd[
%(i)
s];
for(int j=0; j<z_c; j++){
// use the index to find the correct position in the output
c_st = j * st1;
o_idx[
%(non_pool_ndim)
s +
%(i)
s] = r_idx[
%(i)
s];
c_end = c_st + ws1;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
// skip the padding
c_st = c_st < pd1 ? pd1 : c_st;
ccode
+=
"""
c_end = c_end > (c - pd1) ? c - pd1 : c_end;
dtype_
%(z)
s * z;
// from padding_img space into img space
if (
%(total_ndim)
s == 4)
c_st -= pd1;
{
c_end -= pd1;
// the maximum value
// the maximum value
maximum = ((dtype_
%(maxout)
s*)(PyArray_GETPTR4(
%(maxout)
s,
b,k,i,j
)))[0];
maximum = ((dtype_
%(maxout)
s*)(PyArray_GETPTR4(
%(maxout)
s,
o_idx[0],o_idx[1],o_idx[2],o_idx[3]
)))[0];
// z at this position
// z at this position
dtype_
%(z)
s * z = ((dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s, b, k, i, j)));
z = ((dtype_
%(z)
s*)(PyArray_GETPTR4(
%(z)
s,o_idx[0],o_idx[1],o_idx[2],o_idx[3])));
}
else
{
// the maximum value
maximum = ((dtype_
%(maxout)
s*)(PyArray_GetPtr(
%(maxout)
s,o_idx)))[0];
// z at this position
z = ((dtype_
%(z)
s*)(PyArray_GetPtr(
%(z)
s,o_idx)));
}
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
// go through the pooled region in the unpadded input
// go through the pooled region in the unpadded input
for(int m=r_st; m<r_end; m
++)
for(int m
%(i)
s=r_st[
%(i)
s]; m
%(i)
s<r_end[
%(i)
s]; m
%(i)
s
++)
{
{
for(int n=c_st; n<c_end; n++)
i_idx[
%(non_pool_ndim)
s +
%(i)
s] = m
%(i)
s;
"""
%
dict
(
i
=
i
,
non_pool_ndim
=
non_pool_ndim
)
ccode
+=
"""
dtype_
%(x)
s a;
dtype_
%(ggx)
s * ggx;
if (
%(total_ndim)
s == 4)
{
{
dtype_
%(x)
s a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,b,k,m,n)))[0];
a = ((dtype_
%(x)
s*)(PyArray_GETPTR4(
%(x)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])))[0];
dtype_
%(ggx)
s * ggx = (
ggx = ((dtype_
%(ggx)
s*)(PyArray_GETPTR4(
%(ggx)
s,i_idx[0],i_idx[1],i_idx[2],i_idx[3])));
(dtype_
%(ggx)
s*)(PyArray_GETPTR4(
%(ggx)
s, b, k, m, n)));
if (a == maximum){
z[0] += ggx[0];
}
}
}
}
}
else
{
a = ((dtype_
%(x)
s*)(PyArray_GetPtr(
%(x)
s,i_idx)))[0];
ggx = ((dtype_
%(ggx)
s*)(PyArray_GetPtr(
%(ggx)
s,i_idx)));
}
}
if (a == maximum){
z[0] += ggx[0];
}
}
"""
%
locals
()
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // for loop over region
"""
for
i
in
xrange
(
nd
):
ccode
+=
"""
} // loop over pooling dimension
"""
ccode
+=
"""
} // for loop over non-pooling dimensions
"""
return
ccode
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
0
,
3
,
self
.
openmp
)
return
(
0
,
4
,
self
.
openmp
)
theano/tensor/signal/tests/test_pool.py
浏览文件 @
a16e91f7
from
__future__
import
absolute_import
,
print_function
,
division
from
__future__
import
absolute_import
,
print_function
,
division
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
from
nose_parameterized
import
parameterized
from
itertools
import
product
from
itertools
import
product
import
os
import
os
import
unittest
import
unittest
...
@@ -14,7 +15,7 @@ import numpy
...
@@ -14,7 +15,7 @@ import numpy
import
theano
import
theano
import
theano.tensor
as
tensor
import
theano.tensor
as
tensor
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.signal.pool
import
(
Pool
,
pool_2d
,
from
theano.tensor.signal.pool
import
(
Pool
,
pool_2d
,
pool_3d
,
MaxPoolGrad
,
AveragePoolGrad
,
MaxPoolGrad
,
AveragePoolGrad
,
max_pool_2d_same_size
,
max_pool_2d_same_size
,
DownsampleFactorMaxGradGrad
)
DownsampleFactorMaxGradGrad
)
...
@@ -57,6 +58,36 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
...
@@ -57,6 +58,36 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val
[
k
][
i
,
j
]
=
func
(
patch
)
output_val
[
k
][
i
,
j
]
=
func
(
patch
)
return
output_val
return
output_val
@staticmethod
def
numpy_max_pool_nd
(
input
,
ds
,
ignore_border
=
False
,
mode
=
'max'
):
'''Helper function, implementing pool_nd in pure numpy'''
if
len
(
input
.
shape
)
<
len
(
ds
):
raise
NotImplementedError
(
'input should have at least
%
s dim,'
' shape is
%
s'
%
(
str
(
ds
),
str
(
input
.
shape
)))
nd
=
len
(
ds
)
si
=
[
0
]
*
nd
if
not
ignore_border
:
for
i
in
range
(
nd
):
if
input
.
shape
[
-
nd
+
i
]
%
ds
[
i
]:
si
[
i
]
+=
1
out_shp
=
list
(
input
.
shape
[:
-
nd
])
for
i
in
range
(
nd
):
out_shp
.
append
(
input
.
shape
[
-
nd
+
i
]
//
ds
[
i
]
+
si
[
i
])
output_val
=
numpy
.
zeros
(
out_shp
)
func
=
numpy
.
max
if
mode
==
'sum'
:
func
=
numpy
.
sum
elif
mode
!=
'max'
:
func
=
numpy
.
average
for
l
in
numpy
.
ndindex
(
*
input
.
shape
[:
-
nd
]):
for
r
in
numpy
.
ndindex
(
*
output_val
.
shape
[
-
nd
:]):
patch
=
input
[
l
][
tuple
(
slice
(
r
[
i
]
*
ds
[
i
],
(
r
[
i
]
+
1
)
*
ds
[
i
])
for
i
in
range
(
nd
))]
output_val
[
l
][
r
]
=
func
(
patch
)
return
output_val
@staticmethod
@staticmethod
def
numpy_max_pool_2d_stride_padding
(
def
numpy_max_pool_2d_stride_padding
(
x
,
ds
,
ignore_border
=
True
,
st
=
None
,
padding
=
(
0
,
0
),
mode
=
'max'
):
x
,
ds
,
ignore_border
=
True
,
st
=
None
,
padding
=
(
0
,
0
),
mode
=
'max'
):
...
@@ -111,6 +142,60 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
...
@@ -111,6 +142,60 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val
[
k
][
i
,
j
]
=
func
(
patch
)
output_val
[
k
][
i
,
j
]
=
func
(
patch
)
return
output_val
return
output_val
@staticmethod
def
numpy_max_pool_nd_stride_padding
(
input
,
ds
,
ignore_border
=
True
,
st
=
None
,
padding
=
None
,
mode
=
'max'
):
assert
ignore_border
nd
=
len
(
ds
)
if
padding
is
None
:
padding
=
(
0
,)
*
nd
if
st
is
None
:
st
=
(
0
,)
*
nd
assert
len
(
padding
)
==
len
(
ds
)
==
len
(
st
)
assert
all
(
ds
[
i
]
>
padding
[
i
]
for
i
in
range
(
nd
))
def
pad_img
(
x
):
# initialize padded input
y
=
numpy
.
zeros
(
x
.
shape
[
0
:
-
nd
]
+
tuple
(
x
.
shape
[
-
nd
+
i
]
+
padding
[
i
]
*
2
for
i
in
range
(
nd
)),
dtype
=
x
.
dtype
)
# place the unpadded input in the center
block
=
((
slice
(
None
),)
*
(
len
(
x
.
shape
)
-
nd
)
+
tuple
(
slice
(
padding
[
i
],
x
.
shape
[
-
nd
+
i
]
+
padding
[
i
])
for
i
in
range
(
nd
)))
y
[
block
]
=
x
return
y
pad_img_shp
=
list
(
input
.
shape
[:
-
nd
])
out_shp
=
list
(
input
.
shape
[:
-
nd
])
for
i
in
range
(
nd
):
padded_size
=
input
.
shape
[
-
nd
+
i
]
+
2
*
padding
[
i
]
pad_img_shp
.
append
(
padded_size
)
out_shp
.
append
((
padded_size
-
ds
[
i
])
//
st
[
i
]
+
1
)
output_val
=
numpy
.
zeros
(
out_shp
)
padded_input
=
pad_img
(
input
)
func
=
numpy
.
max
if
mode
==
'sum'
:
func
=
numpy
.
sum
elif
mode
!=
'max'
:
func
=
numpy
.
average
inc_pad
=
mode
==
'average_inc_pad'
for
l
in
numpy
.
ndindex
(
*
input
.
shape
[:
-
nd
]):
for
r
in
numpy
.
ndindex
(
*
output_val
.
shape
[
-
nd
:]):
region
=
[]
for
i
in
range
(
nd
):
r_st
=
r
[
i
]
*
st
[
i
]
r_end
=
builtins
.
min
(
r_st
+
ds
[
i
],
pad_img_shp
[
-
nd
+
i
])
if
not
inc_pad
:
r_st
=
builtins
.
max
(
r_st
,
padding
[
i
])
r_end
=
builtins
.
min
(
r_end
,
input
.
shape
[
-
nd
+
i
]
+
padding
[
i
])
region
.
append
(
slice
(
r_st
,
r_end
))
patch
=
padded_input
[
l
][
region
]
output_val
[
l
][
r
]
=
func
(
patch
)
return
output_val
@staticmethod
@staticmethod
def
numpy_max_pool_2d_stride
(
input
,
ds
,
ignore_border
=
False
,
st
=
None
,
def
numpy_max_pool_2d_stride
(
input
,
ds
,
ignore_border
=
False
,
st
=
None
,
mode
=
'max'
):
mode
=
'max'
):
...
@@ -174,83 +259,166 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
...
@@ -174,83 +259,166 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
output_val
[
k
][
i
,
j
]
=
func
(
patch
)
output_val
[
k
][
i
,
j
]
=
func
(
patch
)
return
output_val
return
output_val
@staticmethod
def
numpy_max_pool_nd_stride
(
input
,
ds
,
ignore_border
=
False
,
st
=
None
,
mode
=
'max'
):
'''Helper function, implementing pooling in pure numpy
this function provides st input to indicate the stide size
for the pooling regions. if not indicated, st == sd.'''
nd
=
len
(
ds
)
if
st
is
None
:
st
=
ds
assert
len
(
st
)
==
len
(
ds
)
out_shp
=
list
(
input
.
shape
[:
-
nd
])
for
i
in
range
(
nd
):
out
=
0
if
input
.
shape
[
-
nd
+
i
]
-
ds
[
i
]
>=
0
:
out
=
(
input
.
shape
[
-
nd
+
i
]
-
ds
[
i
])
//
st
[
i
]
+
1
if
not
ignore_border
:
if
out
>
0
:
if
input
.
shape
[
-
nd
+
i
]
-
((
out
-
1
)
*
st
[
i
]
+
ds
[
i
])
>
0
:
if
input
.
shape
[
-
nd
+
i
]
-
out
*
st
[
i
]
>
0
:
out
+=
1
else
:
if
input
.
shape
[
-
nd
+
i
]
>
0
:
out
+=
1
out_shp
.
append
(
out
)
func
=
numpy
.
max
if
mode
==
'sum'
:
func
=
numpy
.
sum
elif
mode
!=
'max'
:
func
=
numpy
.
average
output_val
=
numpy
.
zeros
(
out_shp
)
for
l
in
numpy
.
ndindex
(
*
input
.
shape
[:
-
nd
]):
for
r
in
numpy
.
ndindex
(
*
output_val
.
shape
[
-
nd
:]):
region
=
[]
for
i
in
range
(
nd
):
r_st
=
r
[
i
]
*
st
[
i
]
r_end
=
builtins
.
min
(
r_st
+
ds
[
i
],
input
.
shape
[
-
nd
+
i
])
region
.
append
(
slice
(
r_st
,
r_end
))
patch
=
input
[
l
][
region
]
output_val
[
l
][
r
]
=
func
(
patch
)
return
output_val
def
test_DownsampleFactorMax
(
self
):
def
test_DownsampleFactorMax
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
# generate random images
# maxpool, input size
maxpoolshps
=
((
1
,
1
),
(
2
,
2
),
(
3
,
3
),
(
2
,
3
))
examples
=
(
imval
=
rng
.
rand
(
4
,
2
,
16
,
16
)
((
2
,),
(
16
,)),
images
=
tensor
.
dtensor4
()
((
2
,),
(
4
,
16
,)),
for
maxpoolshp
,
ignore_border
,
mode
in
product
(
maxpoolshps
,
((
2
,),
(
4
,
2
,
16
,)),
((
1
,
1
),
(
4
,
2
,
16
,
16
)),
((
2
,
2
),
(
4
,
2
,
16
,
16
)),
((
3
,
3
),
(
4
,
2
,
16
,
16
)),
((
3
,
2
),
(
4
,
2
,
16
,
16
)),
((
3
,
2
,
2
),
(
3
,
2
,
16
,
16
,
16
)),
((
2
,
3
,
2
),
(
3
,
2
,
16
,
16
,
16
)),
((
2
,
2
,
3
),
(
3
,
2
,
16
,
16
,
16
)),
((
2
,
2
,
3
,
2
),
(
3
,
2
,
6
,
6
,
6
,
5
)),
)
for
example
,
ignore_border
,
mode
in
product
(
examples
,
[
True
,
False
],
[
True
,
False
],
[
'max'
,
[
'max'
,
'sum'
,
'sum'
,
'average_inc_pad'
,
'average_inc_pad'
,
'average_exc_pad'
]):
'average_exc_pad'
]):
# print 'maxpoolshp =', maxpoolshp
(
maxpoolshp
,
inputsize
)
=
example
# print 'ignore_border =', ignore_border
imval
=
rng
.
rand
(
*
inputsize
)
images
=
theano
.
shared
(
imval
)
# Pure Numpy computation
# Pure Numpy computation
numpy_output_val
=
self
.
numpy_max_pool_2
d
(
imval
,
maxpoolshp
,
numpy_output_val
=
self
.
numpy_max_pool_n
d
(
imval
,
maxpoolshp
,
ignore_border
,
ignore_border
,
mode
=
mode
)
mode
=
mode
)
# The pool_2d or pool_3d helper methods
if
len
(
maxpoolshp
)
==
2
:
output
=
pool_2d
(
images
,
maxpoolshp
,
ignore_border
,
output
=
pool_2d
(
images
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
mode
=
mode
)
f
=
function
([
images
,
],
[
output
,
])
f
=
function
([],
[
output
,
])
output_val
=
f
(
imval
)
output_val
=
f
()
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
elif
len
(
maxpoolshp
)
==
3
:
output
=
pool_3d
(
images
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
f
=
function
([],
[
output
,
])
output_val
=
f
()
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
# Pool op
# Pool op
maxpool_op
=
Pool
(
ignore_border
=
ignore_border
,
maxpool_op
=
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)(
images
,
maxpoolshp
)
mode
=
mode
)(
images
,
maxpoolshp
)
output_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
output_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)
ignore_border
=
ignore_border
)
utt
.
assert_allclose
(
numpy
.
asarray
(
output_shape
),
numpy_output_val
.
shape
)
utt
.
assert_allclose
(
numpy
.
asarray
(
output_shape
),
numpy_output_val
.
shape
)
f
=
function
([
images
],
maxpool_op
)
f
=
function
([
],
maxpool_op
)
output_val
=
f
(
imval
)
output_val
=
f
(
)
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
def
test_DownsampleFactorMaxStride
(
self
):
def
test_DownsampleFactorMaxStride
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
),
(
3
,
3
),
(
5
,
3
),
(
16
,
16
))
# maxpool, stride, ignore_border, input, output sizes
stridesizes
=
((
1
,
1
),
(
3
,
3
),
(
5
,
7
),)
examples
=
(
# generate random images
((
1
,
1
),
(
1
,
1
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
16
,
16
)),
imval
=
rng
.
rand
(
4
,
10
,
16
,
16
)
((
1
,
1
),
(
3
,
3
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
)),
# The same for each mode
((
1
,
1
),
(
5
,
7
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
3
)),
outputshps
=
(
((
1
,
1
),
(
1
,
1
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
16
,
16
)),
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
),
(
4
,
10
,
4
,
3
),
((
1
,
1
),
(
3
,
3
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
)),
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
),
(
4
,
10
,
4
,
3
),
((
1
,
1
),
(
5
,
7
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
3
)),
(
4
,
10
,
14
,
14
),
(
4
,
10
,
5
,
5
),
(
4
,
10
,
3
,
2
),
((
3
,
3
),
(
1
,
1
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
14
,
14
)),
(
4
,
10
,
14
,
14
),
(
4
,
10
,
6
,
6
),
(
4
,
10
,
4
,
3
),
((
3
,
3
),
(
3
,
3
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
5
,
5
)),
(
4
,
10
,
12
,
14
),
(
4
,
10
,
4
,
5
),
(
4
,
10
,
3
,
2
),
((
3
,
3
),
(
5
,
7
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
3
,
2
)),
(
4
,
10
,
12
,
14
),
(
4
,
10
,
5
,
6
),
(
4
,
10
,
4
,
3
),
((
3
,
3
),
(
1
,
1
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
14
,
14
)),
(
4
,
10
,
1
,
1
),
(
4
,
10
,
1
,
1
),
(
4
,
10
,
1
,
1
),
((
3
,
3
),
(
3
,
3
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
6
,
6
)),
(
4
,
10
,
1
,
1
),
(
4
,
10
,
1
,
1
),
(
4
,
10
,
1
,
1
),)
((
3
,
3
),
(
5
,
7
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
3
)),
((
5
,
3
),
(
1
,
1
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
12
,
14
)),
images
=
tensor
.
dtensor4
()
((
5
,
3
),
(
3
,
3
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
5
)),
indx
=
0
((
5
,
3
),
(
5
,
7
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
3
,
2
)),
for
mode
,
maxpoolshp
,
ignore_border
in
product
([
'max'
,
((
5
,
3
),
(
1
,
1
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
12
,
14
)),
((
5
,
3
),
(
3
,
3
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
5
,
6
)),
((
5
,
3
),
(
5
,
7
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
4
,
3
)),
((
16
,
16
),
(
1
,
1
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
3
,
3
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
5
,
7
),
True
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
1
,
1
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
3
,
3
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
16
,
16
),
(
5
,
7
),
False
,
(
4
,
10
,
16
,
16
),
(
4
,
10
,
1
,
1
)),
((
3
,),
(
5
,),
True
,
(
16
,),
(
3
,)),
((
3
,),
(
5
,),
True
,
(
2
,
16
,),
(
2
,
3
,)),
((
5
,),
(
3
,),
True
,
(
2
,
3
,
16
,),
(
2
,
3
,
4
,)),
((
5
,
1
,
3
),
(
3
,
3
,
3
),
True
,
(
2
,
16
,
16
,
16
),
(
2
,
4
,
6
,
5
)),
((
5
,
1
,
3
),
(
3
,
3
,
3
),
True
,
(
4
,
2
,
16
,
16
,
16
),
(
4
,
2
,
4
,
6
,
5
)),
)
for
example
,
mode
in
product
(
examples
,
[
'max'
,
'sum'
,
'sum'
,
'average_inc_pad'
,
'average_inc_pad'
,
'average_exc_pad'
],
'average_exc_pad'
]):
maxpoolshps
,
(
maxpoolshp
,
stride
,
ignore_border
,
inputshp
,
outputshp
)
=
example
[
True
,
False
]):
# generate random images
for
stride
in
stridesizes
:
imval
=
rng
.
rand
(
*
inputshp
)
outputshp
=
outputshps
[
indx
%
len
(
outputshps
)]
images
=
theano
.
shared
(
imval
)
indx
+=
1
# Pool op
# Pool op
numpy_output_val
=
\
numpy_output_val
=
\
self
.
numpy_max_pool_2
d_stride
(
imval
,
maxpoolshp
,
self
.
numpy_max_pool_n
d_stride
(
imval
,
maxpoolshp
,
ignore_border
,
stride
,
ignore_border
,
stride
,
mode
)
mode
)
assert
numpy_output_val
.
shape
==
outputshp
,
(
assert
numpy_output_val
.
shape
==
outputshp
,
(
"outshape is
%
s, calculated shape is
%
s"
"outshape is
%
s, calculated shape is
%
s"
%
(
outputshp
,
numpy_output_val
.
shape
))
%
(
outputshp
,
numpy_output_val
.
shape
))
maxpool_op
=
\
maxpool_op
=
\
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
Pool
(
ndim
=
len
(
maxpoolshp
),
images
,
maxpoolshp
,
stride
)
ignore_border
=
ignore_border
,
f
=
function
([
images
],
maxpool_op
)
mode
=
mode
)(
images
,
maxpoolshp
,
stride
)
output_val
=
f
(
imval
)
f
=
function
([],
maxpool_op
)
output_val
=
f
()
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
def
test_DownsampleFactorMaxStrideExtra
(
self
):
def
test_DownsampleFactorMaxStrideExtra
(
self
):
...
@@ -287,7 +455,8 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
...
@@ -287,7 +455,8 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
"outshape is
%
s, calculated shape is
%
s"
"outshape is
%
s, calculated shape is
%
s"
%
(
outputshp
,
numpy_output_val
.
shape
))
%
(
outputshp
,
numpy_output_val
.
shape
))
maxpool_op
=
\
maxpool_op
=
\
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
Pool
(
ignore_border
=
ignore_border
,
ndim
=
len
(
maxpoolshp
),
mode
=
mode
)(
images
,
maxpoolshp
,
stride
)
images
,
maxpoolshp
,
stride
)
f
=
function
([
images
],
maxpool_op
)
f
=
function
([
images
],
maxpool_op
)
output_val
=
f
(
imval
)
output_val
=
f
(
imval
)
...
@@ -296,319 +465,351 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
...
@@ -296,319 +465,351 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
def
test_DownsampleFactorMaxPaddingStride
(
self
):
def
test_DownsampleFactorMaxPaddingStride
(
self
):
ignore_border
=
True
# padding does not support ignore_border=False
ignore_border
=
True
# padding does not support ignore_border=False
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolsizes
=
[(
3
,
3
),
(
4
,
4
),
(
3
,
4
),
(
4
,
3
),
(
2
,
2
)]
# maxpool, stride, padding, input sizes
stridesizes
=
[(
2
,
2
),
(
2
,
2
),
(
1
,
1
),
(
1
,
2
),
(
2
,
2
)]
examples
=
(
paddingsizes
=
[(
2
,
2
),
(
1
,
2
),
(
2
,
1
),
(
0
,
0
),
(
1
,
1
)]
((
3
,),
(
2
,),
(
2
,),
(
5
,)),
imgsizes
=
[(
5
,
5
),
(
5
,
5
),
(
5
,
6
),
(
6
,
5
),
(
5
,
5
)]
((
3
,),
(
2
,),
(
2
,),
(
4
,
5
)),
m
=
4
# minibatch
((
3
,),
(
2
,),
(
2
,),
(
4
,
2
,
5
)),
c
=
2
# channel size
((
3
,),
(
2
,),
(
2
,),
(
4
,
2
,
5
,
5
)),
images
=
tensor
.
dtensor4
()
((
3
,
3
),
(
2
,
2
),
(
2
,
2
),
(
4
,
2
,
5
,
5
)),
for
indx
,
mode
in
product
(
numpy
.
arange
(
len
(
maxpoolsizes
)),
((
4
,
4
),
(
2
,
2
),
(
1
,
2
),
(
4
,
2
,
5
,
5
)),
((
3
,
4
),
(
1
,
1
),
(
2
,
1
),
(
4
,
2
,
5
,
6
)),
((
4
,
3
),
(
1
,
2
),
(
0
,
0
),
(
4
,
2
,
6
,
5
)),
((
2
,
2
),
(
2
,
2
),
(
1
,
1
),
(
4
,
2
,
5
,
5
)),
((
4
,
3
,
2
),
(
1
,
2
,
2
),
(
0
,
2
,
1
),
(
4
,
6
,
6
,
5
)),
((
4
,
3
,
2
),
(
1
,
2
,
2
),
(
0
,
2
,
1
),
(
4
,
2
,
6
,
5
,
5
)),
)
for
example
,
mode
in
product
(
examples
,
[
'max'
,
'sum'
,
'average_inc_pad'
,
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
'average_exc_pad'
]):
imgsize
=
imgsizes
[
indx
]
(
maxpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
=
example
imval
=
rng
.
rand
(
m
,
c
,
imgsize
[
0
],
imgsize
[
1
])
-
0.5
imval
=
rng
.
rand
(
*
inputsize
)
-
0.5
images
=
theano
.
shared
(
imval
)
stridesize
=
stridesizes
[
indx
]
maxpoolsize
=
maxpoolsizes
[
indx
]
numpy_output_val
=
self
.
numpy_max_pool_nd_stride_padding
(
paddingsize
=
paddingsizes
[
indx
]
imval
,
maxpoolshp
,
ignore_border
,
numpy_output_val
=
self
.
numpy_max_pool_2d_stride_padding
(
imval
,
maxpoolsize
,
ignore_border
,
stridesize
,
paddingsize
,
mode
)
stridesize
,
paddingsize
,
mode
)
maxpool_op
=
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
maxpool_op
=
Pool
(
images
,
maxpoolsize
,
stridesize
,
paddingsize
)
ndim
=
len
(
maxpoolshp
),
f
=
function
([
images
],
maxpool_op
)
ignore_border
=
ignore_border
,
output_val
=
f
(
imval
)
mode
=
mode
)(
images
,
maxpoolshp
,
stridesize
,
paddingsize
)
f
=
function
([],
maxpool_op
)
output_val
=
f
()
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
def
test_DownsampleFactorMaxPaddingStride_grad
(
self
):
def
test_DownsampleFactorMaxPaddingStride_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
imgsizes
=
((
10
,
10
),
(
10
,
5
),
(
5
,
5
))
# maxpool, stride, padding, input sizes
maxpoolsizes
=
((
5
,
3
),
(
3
,
5
),
(
3
,
3
))
examples
=
(
stridesizes
=
((
3
,
2
),
(
2
,
3
),
(
3
,
3
))
((
10
,),
(
5
,),
(
3
,),
(
2
,)),
paddingsizes
=
((
2
,
2
),
(
2
,
1
),
(
2
,
2
))
((
10
,),
(
5
,),
(
3
,),
(
2
,
2
)),
((
10
,),
(
5
,),
(
3
,),
(
1
,
1
,
2
)),
((
10
,
10
),
(
5
,
3
),
(
3
,
2
),
(
1
,
1
,
2
,
2
)),
((
10
,
5
),
(
3
,
5
),
(
2
,
3
),
(
1
,
1
,
2
,
1
)),
((
5
,
5
),
(
3
,
3
),
(
3
,
3
),
(
1
,
1
,
2
,
2
)),
((
5
,
5
,
5
),
(
3
,
3
,
3
),
(
3
,
3
,
3
),
(
1
,
1
,
2
,
2
,
2
)),
)
# average_inc_pad and average_exc_pad do not
# average_inc_pad and average_exc_pad do not
# support grad with padding
# support grad with padding
for
mode
in
[
'max'
,
'sum'
]:
for
mode
in
[
'max'
,
'sum'
]:
for
i
in
range
(
len
(
imgsizes
)):
for
example
in
examples
:
imgsize
=
imgsizes
[
i
]
(
maxpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
=
example
imval
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
maxpoolsize
=
maxpoolsizes
[
i
]
stridesize
=
stridesizes
[
i
]
paddingsize
=
paddingsizes
[
i
]
def
mp
(
input
):
def
mp
(
input
):
return
Pool
(
ignore_border
=
True
,
mode
=
mode
)(
return
Pool
(
input
,
maxpoolsize
,
stridesize
,
paddingsize
)
ndim
=
len
(
maxpoolshp
),
ignore_border
=
True
,
mode
=
mode
,
)(
input
,
maxpoolshp
,
stridesize
,
paddingsize
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_DownsampleFactorMax_grad
(
self
):
def
test_DownsampleFactorMax_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
),
(
3
,
2
),
(
2
,
3
))
# maxpool, input sizes
imval
=
rng
.
rand
(
2
,
3
,
3
,
4
)
*
10.0
examples
=
(
# more variance means numeric gradient will be more accurate
((
2
,),
(
3
,)),
((
2
,),
(
2
,
3
)),
for
maxpoolshp
,
ignore_border
,
mode
in
product
(
maxpoolshps
,
((
2
,),
(
2
,
3
,
3
)),
((
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
1
,
1
,
1
),
(
2
,
3
,
3
)),
((
3
,
2
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
,
2
),
(
2
,
3
,
3
,
4
,
4
)),
((
2
,
2
,
3
),
(
2
,
3
,
3
,
4
,
4
)),
)
for
example
,
ignore_border
,
mode
in
product
(
examples
,
[
True
,
False
],
[
True
,
False
],
[
'max'
,
[
'max'
,
'sum'
,
'sum'
,
'average_inc_pad'
,
'average_inc_pad'
,
'average_exc_pad'
]):
'average_exc_pad'
]):
(
maxpoolshp
,
inputsize
)
=
example
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
# more variance means numeric gradient will be more accurate
def
mp
(
input
):
def
mp
(
input
):
return
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
return
Pool
(
ndim
=
len
(
maxpoolshp
),
input
,
maxpoolshp
)
ignore_border
=
ignore_border
,
mode
=
mode
)(
input
,
maxpoolshp
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_DownsampleFactorMax_grad_st
(
self
):
# pool, stride, input sizes
# checks the gradient for the case that stride is used
pool_grad_st_examples
=
(
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
((
1
,),
(
1
,),
(
16
,)),
maxpoolshps
=
((
1
,
1
),
(
3
,
3
),
(
5
,
3
))
((
1
,),
(
3
,),
(
1
,
16
)),
stridesizes
=
((
1
,
1
),
(
3
,
3
),
(
5
,
7
))
((
1
,),
(
5
,),
(
1
,
2
,
16
)),
imval
=
rng
.
rand
(
1
,
2
,
16
,
16
)
((
2
,),
(
1
,),
(
16
,)),
((
2
,),
(
3
,),
(
1
,
16
)),
for
maxpoolshp
,
ignore_border
,
mode
,
stride
in
product
(
maxpoolshps
,
((
2
,),
(
5
,),
(
1
,
2
,
16
)),
((
1
,
1
),
(
1
,
1
),
(
1
,
2
,
16
,
16
)),
((
1
,
1
),
(
3
,
3
),
(
1
,
2
,
16
,
16
)),
((
1
,
1
),
(
5
,
7
),
(
1
,
2
,
16
,
16
)),
((
3
,
3
),
(
1
,
1
),
(
1
,
2
,
16
,
16
)),
((
3
,
3
),
(
3
,
3
),
(
1
,
2
,
16
,
16
)),
((
3
,
3
),
(
5
,
7
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
1
,
1
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
3
,
3
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
5
,
7
),
(
1
,
2
,
16
,
16
)),
((
5
,
1
,
2
),
(
1
,
1
,
1
),
(
16
,
3
,
16
)),
((
5
,
1
,
2
),
(
3
,
1
,
2
),
(
1
,
16
,
3
,
16
)),
((
5
,
1
,
2
),
(
5
,
1
,
4
),
(
1
,
2
,
16
,
3
,
16
)),
((
5
,
3
),
(
3
,
2
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
7
,
5
),
(
1
,
2
,
16
,
16
)),
((
5
,
3
),
(
10
,
6
),
(
1
,
2
,
16
,
16
)),
((
5
,
5
),
(
1
,
1
),
(
1
,
2
,
8
,
5
)),
((
3
,
2
),
(
2
,
3
),
(
1
,
2
,
8
,
5
)),
((
7
,
7
),
(
10
,
10
),
(
1
,
2
,
8
,
5
)),
((
9
,
9
),
(
1
,
1
),
(
1
,
2
,
8
,
5
)),
)
@parameterized.expand
(
product
(
pool_grad_st_examples
,
[
True
,
False
],
[
True
,
False
],
[
'max'
,
[
'max'
,
'sum'
,
'sum'
,
'average_inc_pad'
,
'average_inc_pad'
,
'average_exc_pad'
],
'average_exc_pad'
]),
stridesizes
):
testcase_func_name
=
utt
.
custom_name_func
)
def
mp
(
input
):
def
test_DownsampleFactorMax_grad_st
(
self
,
example
,
ignore_border
,
mode
):
return
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
# checks the gradient for the case that stride is used
input
,
maxpoolshp
,
stride
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_DownsampleFactorMax_grad_st_extra
(
self
):
# checks the gradient for the case
# that stride is used for extra examples
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
5
,
3
),
(
5
,
3
),
(
5
,
3
),
(
5
,
5
),
(
3
,
2
),
(
7
,
7
),
(
9
,
9
))
stridesizes
=
((
3
,
2
),
(
7
,
5
),
(
10
,
6
),
(
1
,
1
),
(
2
,
3
),
(
10
,
10
),
(
1
,
1
))
imvsizs
=
((
16
,
16
),
(
16
,
16
),
(
16
,
16
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
))
for
mode
in
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
(
maxpoolshp
,
stridesize
,
inputsize
)
=
example
for
indx
in
numpy
.
arange
(
len
(
maxpoolshps
)):
imval
=
rng
.
rand
(
*
inputsize
)
imvsize
=
imvsizs
[
indx
]
imval
=
rng
.
rand
(
1
,
2
,
imvsize
[
0
],
imvsize
[
1
])
stride
=
stridesizes
[
indx
]
maxpoolshp
=
maxpoolshps
[
indx
]
for
ignore_border
in
[
True
,
False
]:
def
mp
(
input
):
def
mp
(
input
):
return
Pool
(
ignore_border
=
ignore_border
,
mode
=
mode
)(
return
Pool
(
ndim
=
len
(
maxpoolshp
),
input
,
maxpoolshp
,
stride
)
ignore_border
=
ignore_border
,
mode
=
mode
)(
input
,
maxpoolshp
,
stridesize
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_DownsampleFactorMaxGrad_grad
(
self
):
def
test_DownsampleFactorMaxGrad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
),
(
3
,
2
),
(
2
,
3
))
# maxpool, input sizes
imval
=
rng
.
rand
(
2
,
3
,
3
,
4
)
*
10.0
examples
=
(
((
2
,),
(
2
,)),
((
2
,),
(
2
,
3
)),
((
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
1
,
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
2
,
2
,
3
),
(
2
,
1
,
3
,
3
,
4
)),
)
for
(
maxpoolshp
,
inputsize
)
in
examples
:
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
# more variance means numeric gradient will be more accurate
# more variance means numeric gradient will be more accurate
for
maxpoolshp
in
maxpoolshps
:
for
ignore_border
in
[
True
,
False
]:
for
ignore_border
in
[
True
,
False
]:
# print 'maxpoolshp =', maxpoolshp
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
# print 'ignore_border =', ignore_border
# The shape of the gradient will be the shape of the output
# The shape of the gradient will be the shape of the output
grad_shape
=
Pool
.
out_shape
(
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
ignore_border
=
ignore_border
)
imval
.
shape
,
maxpoolshp
,
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
def
mp
(
input
,
grad
):
def
mp
(
input
,
grad
):
out
=
Pool
(
ignore_border
=
ignore_border
)(
input
,
maxpoolshp
)
out
=
Pool
(
grad_op
=
MaxPoolGrad
(
ignore_border
=
ignore_border
)
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)(
input
,
maxpoolshp
)
grad_op
=
MaxPoolGrad
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_AveragePoolGrad_grad
(
self
):
def
test_AveragePoolGrad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
avgpoolshps
=
((
1
,
1
),
(
3
,
2
),
(
2
,
3
))
# avgpool, input sizes
imval
=
rng
.
rand
(
2
,
3
,
3
,
4
)
*
10.0
examples
=
(
((
2
,),
(
2
,)),
((
2
,),
(
2
,
3
)),
((
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
1
,
1
,
1
),
(
2
,
3
,
3
,
4
)),
((
3
,
2
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
3
,
2
),
(
2
,
3
,
3
,
4
)),
((
2
,
2
,
3
),
(
2
,
3
,
3
,
4
)),
((
2
,
2
,
3
),
(
2
,
1
,
3
,
3
,
4
)),
)
for
(
avgpoolshp
,
inputsize
)
in
examples
:
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
# more variance means numeric gradient will be more accurate
# more variance means numeric gradient will be more accurate
for
avgpoolshp
in
avgpoolshps
:
for
ignore_border
in
[
True
,
False
]:
for
ignore_border
in
[
True
,
False
]:
for
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
for
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
# print 'maxpoolshp =', maxpoolshp
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
# print 'ignore_border =', ignore_border
# The shape of the gradient will be the shape of the output
# The shape of the gradient will be the shape of the output
grad_shape
=
Pool
.
out_shape
(
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
avgpoolshp
,
ignore_border
=
ignore_border
)
imval
.
shape
,
avgpoolshp
,
ndim
=
len
(
avgpoolshp
),
ignore_border
=
ignore_border
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
def
mp
(
input
,
grad
):
def
mp
(
input
,
grad
):
grad_op
=
AveragePoolGrad
(
ignore_border
=
ignore_border
,
grad_op
=
AveragePoolGrad
(
mode
=
mode
)
ndim
=
len
(
avgpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolshp
)
return
grad_op
(
input
,
grad
,
avgpoolshp
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_DownsampleFactorMaxGrad_grad_st
(
self
):
@parameterized.expand
(
product
(
pool_grad_st_examples
,
[
True
,
False
]),
testcase_func_name
=
utt
.
custom_name_func
)
def
test_DownsampleFactorMaxGrad_grad_st
(
self
,
example
,
ignore_border
):
# checks the gradient of the gradient for
# checks the gradient of the gradient for
# the case that stride is used
# the case that stride is used
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
),
(
3
,
3
),
(
5
,
3
))
(
maxpoolshp
,
stride
,
inputsize
)
=
example
stridesizes
=
((
1
,
1
),
(
3
,
3
),
(
5
,
7
))
imval
=
rng
.
rand
(
*
inputsize
)
imval
=
rng
.
rand
(
1
,
2
,
16
,
16
)
for
maxpoolshp
in
maxpoolshps
:
for
ignore_border
in
[
True
,
False
]:
for
stride
in
stridesizes
:
grad_shape
=
Pool
.
out_shape
(
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
imval
.
shape
,
maxpoolshp
,
ndim
=
len
(
maxpoolshp
)
,
ignore_border
=
ignore_border
,
st
=
stride
)
ignore_border
=
ignore_border
,
st
=
stride
)
# skip the grad verification when the output is empty
if
numpy
.
prod
(
grad_shape
)
!=
0
:
grad_val
=
rng
.
rand
(
*
grad_shape
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
def
mp
(
input
,
grad
):
def
mp
(
input
,
grad
):
out
=
Pool
(
ignore_border
=
ignore_border
)(
out
=
Pool
(
input
,
maxpoolshp
,
stride
)
ndim
=
len
(
maxpoolshp
),
grad_op
=
MaxPoolGrad
(
ignore_border
=
ignore_border
)
ignore_border
=
ignore_border
)(
input
,
maxpoolshp
,
stride
)
grad_op
=
MaxPoolGrad
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
ignore_border
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
,
stride
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
,
stride
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_AveragePoolGrad_grad_st
(
self
):
@parameterized.expand
(
product
(
pool_grad_st_examples
,
[
True
,
False
],
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]),
testcase_func_name
=
utt
.
custom_name_func
)
def
test_AveragePoolGrad_grad_st
(
self
,
example
,
ignore_border
,
mode
):
# checks the gradient of the gradient for
# checks the gradient of the gradient for
# the case that stride is used
# the case that stride is used
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
avgpoolshps
=
((
1
,
1
),
(
3
,
3
),
(
5
,
3
))
(
avgpoolshp
,
stride
,
inputsize
)
=
example
stridesizes
=
((
1
,
1
),
(
3
,
3
),
(
5
,
7
))
imval
=
rng
.
rand
(
*
inputsize
)
imval
=
rng
.
rand
(
1
,
2
,
16
,
16
)
for
avgpoolshp
in
avgpoolshps
:
for
ignore_border
in
[
True
,
False
]:
for
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
for
stride
in
stridesizes
:
grad_shape
=
Pool
.
out_shape
(
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
avgpoolshp
,
imval
.
shape
,
avgpoolshp
,
ndim
=
len
(
avgpoolshp
),
ignore_border
=
ignore_border
,
st
=
stride
)
ignore_border
=
ignore_border
,
st
=
stride
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
def
mp
(
input
,
grad
):
grad_op
=
AveragePoolGrad
(
ignore_border
=
ignore_border
,
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolshp
,
stride
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_DownsampleFactorMaxGrad_grad_st_extra
(
self
):
# checks the gradient of the gradient for the case that
# stride is used for extra examples
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
5
,
3
),
(
5
,
3
),
(
5
,
3
),
(
5
,
5
),
(
3
,
2
),
(
7
,
7
),
(
9
,
9
))
stridesizes
=
((
3
,
2
),
(
7
,
5
),
(
10
,
6
),
(
1
,
1
),
(
2
,
3
),
(
10
,
10
),
(
1
,
1
))
imvsizs
=
((
16
,
16
),
(
16
,
16
),
(
16
,
16
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
))
for
indx
in
numpy
.
arange
(
len
(
maxpoolshps
)):
imvsize
=
imvsizs
[
indx
]
imval
=
rng
.
rand
(
1
,
2
,
imvsize
[
0
],
imvsize
[
1
])
stride
=
stridesizes
[
indx
]
maxpoolshp
=
maxpoolshps
[
indx
]
for
ignore_border
in
[
True
,
False
]:
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolshp
,
ignore_border
=
ignore_border
,
st
=
stride
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
def
mp
(
input
,
grad
):
out
=
Pool
(
ignore_border
=
ignore_border
)(
input
,
maxpoolshp
,
stride
)
grad_op
=
MaxPoolGrad
(
ignore_border
=
ignore_border
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
,
stride
)
# skip the grad verification when the output is empty
# skip the grad verification when the output is empty
if
numpy
.
prod
(
grad_shape
)
==
0
:
if
numpy
.
prod
(
grad_shape
)
!=
0
:
continue
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_AveragePoolGrad_grad_st_extra
(
self
):
# checks the gradient of the gradient for the case that
# stride is used for extra examples
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
avgpoolshps
=
((
5
,
3
),
(
5
,
3
),
(
5
,
3
),
(
5
,
5
),
(
3
,
2
),
(
7
,
7
),
(
9
,
9
))
stridesizes
=
((
3
,
2
),
(
7
,
5
),
(
10
,
6
),
(
1
,
1
),
(
2
,
3
),
(
10
,
10
),
(
1
,
1
))
imvsizs
=
((
16
,
16
),
(
16
,
16
),
(
16
,
16
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
),
(
8
,
5
))
for
indx
in
numpy
.
arange
(
len
(
avgpoolshps
)):
imvsize
=
imvsizs
[
indx
]
imval
=
rng
.
rand
(
1
,
2
,
imvsize
[
0
],
imvsize
[
1
])
stride
=
stridesizes
[
indx
]
avgpoolshp
=
avgpoolshps
[
indx
]
for
ignore_border
in
[
True
,
False
]:
for
mode
in
[
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]:
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
avgpoolshp
,
ignore_border
=
ignore_border
,
st
=
stride
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
def
mp
(
input
,
grad
):
def
mp
(
input
,
grad
):
grad_op
=
AveragePoolGrad
(
ignore_border
=
ignore_border
,
grad_op
=
AveragePoolGrad
(
ndim
=
len
(
avgpoolshp
),
ignore_border
=
ignore_border
,
mode
=
mode
)
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolshp
,
stride
)
return
grad_op
(
input
,
grad
,
avgpoolshp
,
stride
)
# skip the grad verification when the output is empty
if
numpy
.
prod
(
grad_shape
)
==
0
:
continue
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_DownsampleFactorMaxPaddingStride_grad_grad
(
self
):
def
test_DownsampleFactorMaxPaddingStride_grad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
imgsizes
=
((
10
,
10
),
(
10
,
5
),
(
5
,
5
))
# maxpool, stride, padding, input sizes
maxpoolsizes
=
((
5
,
3
),
(
3
,
5
),
(
3
,
3
))
examples
=
(
stridesizes
=
((
3
,
2
),
(
2
,
3
),
(
3
,
3
))
((
3
,),
(
2
,),
(
2
,),
(
10
,)),
paddingsizes
=
((
2
,
2
),
(
2
,
1
),
(
2
,
2
))
((
3
,),
(
2
,),
(
2
,),
(
2
,
10
,)),
((
3
,),
(
2
,),
(
2
,),
(
2
,
1
,
10
,)),
for
i
in
range
(
len
(
imgsizes
)):
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
imgsize
=
imgsizes
[
i
]
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
imval
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
((
3
,
5
),
(
2
,
3
),
(
2
,
1
),
(
1
,
1
,
10
,
5
)),
maxpoolsize
=
maxpoolsizes
[
i
]
((
3
,
3
),
(
3
,
3
),
(
2
,
2
),
(
1
,
1
,
5
,
5
)),
stridesize
=
stridesizes
[
i
]
((
5
,
3
,
3
),
(
3
,
2
,
2
),
(
2
,
2
,
2
),
(
1
,
1
,
10
,
5
,
5
)),
paddingsize
=
paddingsizes
[
i
]
((
3
,
5
,
3
),
(
2
,
3
,
2
),
(
2
,
1
,
2
),
(
1
,
1
,
5
,
10
,
5
)),
((
3
,
3
,
5
),
(
2
,
2
,
3
),
(
2
,
2
,
1
),
(
1
,
1
,
5
,
5
,
10
)),
)
for
(
maxpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
in
examples
:
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
maxpoolsize
,
st
=
stridesize
,
maxpoolshp
,
ndim
=
len
(
maxpoolshp
),
st
=
stridesize
,
ignore_border
=
True
,
ignore_border
=
True
,
padding
=
paddingsize
)
padding
=
paddingsize
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
def
mp
(
input
,
grad
):
def
mp
(
input
,
grad
):
out
=
Pool
(
ignore_border
=
True
)(
input
,
maxpoolsize
,
stridesize
,
out
=
Pool
(
paddingsize
)
ndim
=
len
(
maxpoolshp
),
grad_op
=
MaxPoolGrad
(
ignore_border
=
True
)
ignore_border
=
True
,
return
grad_op
(
input
,
out
,
grad
,
maxpoolsize
,
stridesize
,
)(
input
,
maxpoolshp
,
stridesize
,
paddingsize
)
paddingsize
)
grad_op
=
MaxPoolGrad
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
True
)
return
grad_op
(
input
,
out
,
grad
,
maxpoolshp
,
stridesize
,
paddingsize
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_AveragePoolPaddingStride_grad_grad
(
self
):
def
test_AveragePoolPaddingStride_grad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
imgsizes
=
((
10
,
10
),
(
10
,
5
),
(
5
,
5
))
# avgpool, stride, padding, input sizes
avgpoolsizes
=
((
5
,
3
),
(
3
,
5
),
(
3
,
3
))
examples
=
(
stridesizes
=
((
3
,
2
),
(
2
,
3
),
(
3
,
3
))
((
3
,),
(
2
,),
(
2
,),
(
10
,)),
paddingsizes
=
((
2
,
2
),
(
2
,
1
),
(
2
,
2
))
((
3
,),
(
2
,),
(
2
,),
(
2
,
10
,)),
((
3
,),
(
2
,),
(
2
,),
(
2
,
1
,
10
,)),
for
i
in
range
(
len
(
imgsizes
)):
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
imgsize
=
imgsizes
[
i
]
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
imval
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
((
3
,
5
),
(
2
,
3
),
(
2
,
1
),
(
1
,
1
,
10
,
5
)),
avgpoolsize
=
avgpoolsizes
[
i
]
((
3
,
3
),
(
3
,
3
),
(
2
,
2
),
(
1
,
1
,
5
,
5
)),
stridesize
=
stridesizes
[
i
]
((
5
,
3
,
3
),
(
3
,
2
,
2
),
(
2
,
2
,
2
),
(
1
,
1
,
10
,
5
,
5
)),
paddingsize
=
paddingsizes
[
i
]
((
3
,
5
,
3
),
(
2
,
3
,
2
),
(
2
,
1
,
2
),
(
1
,
1
,
5
,
10
,
5
)),
((
3
,
3
,
5
),
(
2
,
2
,
3
),
(
2
,
2
,
1
),
(
1
,
1
,
5
,
5
,
10
)),
)
for
(
avgpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
in
examples
:
imval
=
rng
.
rand
(
*
inputsize
)
*
10.0
# 'average_exc_pad' with non-zero padding is not implemented
# 'average_exc_pad' with non-zero padding is not implemented
for
mode
in
[
'sum'
,
'average_inc_pad'
]:
for
mode
in
[
'sum'
,
'average_inc_pad'
]:
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
grad_shape
=
Pool
.
out_shape
(
imval
.
shape
,
avgpoolsize
,
st
=
stridesize
,
avgpoolshp
,
ignore_border
=
True
,
padding
=
paddingsize
)
ndim
=
len
(
avgpoolshp
),
st
=
stridesize
,
ignore_border
=
True
,
padding
=
paddingsize
)
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
grad_val
=
rng
.
rand
(
*
grad_shape
)
*
10.0
def
mp
(
input
,
grad
):
def
mp
(
input
,
grad
):
grad_op
=
AveragePoolGrad
(
ignore_border
=
True
,
mode
=
mode
)
grad_op
=
AveragePoolGrad
(
ndim
=
len
(
avgpoolshp
),
return
grad_op
(
input
,
grad
,
avgpoolsize
,
stridesize
,
paddingsize
)
ignore_border
=
True
,
mode
=
mode
)
return
grad_op
(
input
,
grad
,
avgpoolshp
,
stridesize
,
paddingsize
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
,
grad_val
],
rng
=
rng
)
def
test_DownsampleFactorMax_hessian
(
self
):
def
test_DownsampleFactorMax_hessian
(
self
):
...
@@ -630,26 +831,31 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
...
@@ -630,26 +831,31 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
def
test_DownsampleFactorMaxGradGrad_grad
(
self
):
def
test_DownsampleFactorMaxGradGrad_grad
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
imgsizes
=
((
10
,
10
),
(
10
,
5
),
(
5
,
5
))
# maxpool, stride, padding, input sizes
maxpoolsizes
=
((
5
,
3
),
(
3
,
5
),
(
3
,
3
))
examples
=
(
stridesizes
=
((
3
,
2
),
(
2
,
3
),
(
3
,
3
))
((
3
,),
(
2
,),
(
2
,),
(
10
,)),
paddingsizes
=
((
2
,
2
),
(
2
,
1
),
(
2
,
2
))
((
3
,),
(
2
,),
(
2
,),
(
2
,
10
,)),
((
3
,),
(
2
,),
(
2
,),
(
2
,
1
,
10
,)),
for
i
in
range
(
len
(
imgsizes
)):
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
imgsize
=
imgsizes
[
i
]
((
5
,
3
),
(
3
,
2
),
(
2
,
2
),
(
1
,
1
,
10
,
10
)),
imval1
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
((
3
,
5
),
(
2
,
3
),
(
2
,
1
),
(
1
,
1
,
10
,
5
)),
imval2
=
rng
.
rand
(
1
,
1
,
imgsize
[
0
],
imgsize
[
1
])
*
10.0
((
3
,
3
),
(
3
,
3
),
(
2
,
2
),
(
1
,
1
,
5
,
5
)),
maxpoolsize
=
maxpoolsizes
[
i
]
((
5
,
3
,
3
),
(
3
,
2
,
2
),
(
2
,
2
,
2
),
(
1
,
1
,
10
,
5
,
5
)),
stridesize
=
stridesizes
[
i
]
((
3
,
5
,
3
),
(
2
,
3
,
2
),
(
2
,
1
,
2
),
(
1
,
1
,
5
,
10
,
5
)),
paddingsize
=
paddingsizes
[
i
]
((
3
,
3
,
5
),
(
2
,
2
,
3
),
(
2
,
2
,
1
),
(
1
,
1
,
5
,
5
,
10
)),
)
for
(
maxpoolshp
,
stridesize
,
paddingsize
,
inputsize
)
in
examples
:
imval1
=
rng
.
rand
(
*
inputsize
)
*
10.0
imval2
=
rng
.
rand
(
*
inputsize
)
*
10.0
def
mp
(
input1
,
input2
):
def
mp
(
input1
,
input2
):
op1
=
Pool
(
ignore_border
=
True
)
op1
=
Pool
(
ndim
=
len
(
maxpoolshp
),
ignore_border
=
True
)
pooled_out
=
op1
(
input1
,
maxpools
ize
,
stride
=
stridesize
,
pooled_out
=
op1
(
input1
,
maxpools
hp
,
stridesize
,
paddingsize
)
pad
=
paddingsize
)
op2
=
DownsampleFactorMaxGradGrad
(
op2
=
DownsampleFactorMaxGradGrad
(
ignore_border
=
True
)
ndim
=
len
(
maxpoolshp
),
out
=
op2
(
input1
,
pooled_out
,
input2
,
ws
=
maxpoolsize
,
ignore_border
=
True
)
stride
=
stridesize
,
pad
=
paddingsize
)
out
=
op2
(
input1
,
pooled_out
,
input2
,
maxpoolshp
,
stridesize
,
paddingsize
)
return
out
return
out
utt
.
verify_grad
(
mp
,
[
imval1
,
imval2
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval1
,
imval2
],
rng
=
rng
)
...
@@ -679,6 +885,32 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
...
@@ -679,6 +885,32 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
mode
=
mode
)
mode
=
mode
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_max_pool_3d_3D
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
maxpoolshps
=
((
1
,
1
,
1
),
(
3
,
2
,
1
))
imval
=
rng
.
rand
(
4
,
5
,
6
)
images
=
tensor
.
dtensor3
()
for
maxpoolshp
,
ignore_border
,
mode
in
product
(
maxpoolshps
,
[
True
,
False
],
[
'max'
,
'sum'
,
'average_inc_pad'
,
'average_exc_pad'
]):
# print 'maxpoolshp =', maxpoolshp
# print 'ignore_border =', ignore_border
numpy_output_val
=
self
.
numpy_max_pool_nd
(
imval
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
output
=
pool_3d
(
images
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
output_val
=
function
([
images
],
output
)(
imval
)
utt
.
assert_allclose
(
output_val
,
numpy_output_val
)
def
mp
(
input
):
return
pool_3d
(
input
,
maxpoolshp
,
ignore_border
,
mode
=
mode
)
utt
.
verify_grad
(
mp
,
[
imval
],
rng
=
rng
)
def
test_max_pool_2d_2D_same_size
(
self
):
def
test_max_pool_2d_2D_same_size
(
self
):
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
rng
=
numpy
.
random
.
RandomState
(
utt
.
fetch_seed
())
test_input_array
=
numpy
.
array
([[[
test_input_array
=
numpy
.
array
([[[
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论