Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
0c599015
提交
0c599015
authored
10月 26, 2015
作者:
Pascal Lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3285 from JesseLivezey/cpu_corr3
[WIP] CpuCorrMM closes #3026 - redux
上级
4736c9b3
bbe6cb61
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
1335 行增加
和
0 行删除
+1335
-0
conv.txt
doc/library/tensor/nnet/conv.txt
+7
-0
corr.py
theano/tensor/nnet/corr.py
+619
-0
corr_gemm.c
theano/tensor/nnet/corr_gemm.c
+360
-0
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+349
-0
没有找到文件。
doc/library/tensor/nnet/conv.txt
浏览文件 @
0c599015
...
@@ -120,6 +120,13 @@ TODO: Give examples on how to use these things! They are pretty complicated.
...
@@ -120,6 +120,13 @@ TODO: Give examples on how to use these things! They are pretty complicated.
available. To explicitly disable the graph optimizer, set
available. To explicitly disable the graph optimizer, set
``THEANO_FLAGS=optimizer_excluding=conv_gemm`` in your environment.
``THEANO_FLAGS=optimizer_excluding=conv_gemm`` in your environment.
If using it, please see the warning about a bug in CUDA 5.0 to 6.0 below.
If using it, please see the warning about a bug in CUDA 5.0 to 6.0 below.
- :func:`CorrMM <theano.tensor.nnet.corr.CorrMM>`
This is a CPU-only 2d correlation implementation taken from
`caffe <https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cpp>`_
and also used by Torch. It does not flip the kernel. As it provides a gradient,
you can use it as a replacement for nnet.conv2d. There is currently no
optimization to move this to GPU. This will be added when the new convolution
interface is finished.
- :func:`dnn_conv <theano.sandbox.cuda.dnn.dnn_conv>` GPU-only
- :func:`dnn_conv <theano.sandbox.cuda.dnn.dnn_conv>` GPU-only
convolution using NVIDIA's cuDNN library. This requires that you have
convolution using NVIDIA's cuDNN library. This requires that you have
cuDNN installed and available, which in turn requires CUDA 6.5 and a GPU
cuDNN installed and available, which in turn requires CUDA 6.5 and a GPU
...
...
theano/tensor/nnet/corr.py
0 → 100644
浏览文件 @
0c599015
import
os
import
logging
import
theano
from
theano
import
Apply
from
theano
import
gof
from
theano.tensor
import
as_tensor_variable
,
TensorType
from
theano.tensor.blas_headers
import
blas_header_text
from
theano.tensor.blas
import
ldflags
_logger
=
logging
.
getLogger
(
__name__
)
class
BaseCorrMM
(
gof
.
Op
):
"""
Base class for `CorrMM`, `CorrMM_gradWeights` and
`CorrMM_gradInputs`. Cannot be used directly.
Parameters
----------
border_mode : {'valid', 'full', 'half'}
Additionally, the padding size could be directly specified by an integer
or a pair of integers
subsample
Perform subsampling of the output (default: (1, 1)).
"""
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
if
isinstance
(
border_mode
,
int
):
if
border_mode
<
0
:
raise
ValueError
(
'invalid border_mode {}, which must be a '
'non-negative integer'
.
format
(
border_mode
))
border_mode
=
(
border_mode
,
border_mode
)
if
isinstance
(
border_mode
,
tuple
):
if
len
(
border_mode
)
!=
2
or
border_mode
[
0
]
<
0
or
border_mode
[
1
]
<
0
:
raise
ValueError
(
'invalid border_mode {}, which must be a '
'pair of non-negative integers'
.
format
(
border_mode
))
pad_h
,
pad_w
=
map
(
int
,
border_mode
)
border_mode
=
(
pad_h
,
pad_w
)
if
not
((
isinstance
(
border_mode
,
tuple
)
and
min
(
border_mode
)
>=
0
)
or
border_mode
in
(
'valid'
,
'full'
,
'half'
)):
raise
ValueError
(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a pair of'
' integers'
.
format
(
border_mode
))
self
.
border_mode
=
border_mode
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
self
.
subsample
=
subsample
@property
def
pad
(
self
):
if
self
.
border_mode
!=
'valid'
:
return
self
.
border_mode
return
(
0
,
0
)
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s}'
%
(
self
.
__class__
.
__name__
,
self
.
border_mode
,
str
(
self
.
subsample
))
def
c_support_code
(
self
):
return
blas_header_text
()
def
c_libraries
(
self
):
return
ldflags
()
def
c_compile_args
(
self
):
return
ldflags
(
libs
=
False
,
flags
=
True
)
def
c_lib_dirs
(
self
):
return
ldflags
(
libs
=
False
,
libs_dir
=
True
)
def
c_header_dirs
(
self
):
return
ldflags
(
libs
=
False
,
include_dir
=
True
)
def
c_headers
(
self
):
return
[
'<stdio.h>'
]
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
1
,
0
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# these files
sub
=
{}
dtype
=
str
(
node
.
__dict__
[
'inputs'
][
0
]
.
dtype
)
assert
dtype
in
(
'float32'
,
'float64'
)
if
dtype
==
'float32'
:
sub
[
'gemm'
]
=
'sgemm_'
sub
[
'float_type'
]
=
'npy_float'
sub
[
'float_typenum'
]
=
'NPY_FLOAT'
sub
[
'n_bytes'
]
=
4
sub
[
'c_float_type'
]
=
'float'
else
:
sub
[
'gemm'
]
=
'dgemm_'
sub
[
'float_type'
]
=
'npy_double'
sub
[
'float_typenum'
]
=
'NPY_DOUBLE'
sub
[
'n_bytes'
]
=
8
sub
[
'c_float_type'
]
=
'double'
files
=
[
'corr_gemm.c'
]
codes
=
[
open
(
os
.
path
.
join
(
os
.
path
.
split
(
__file__
)[
0
],
f
))
.
read
()
for
f
in
files
]
final_code
=
''
for
code
in
codes
:
final_code
+=
code
return
final_code
%
sub
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
"""
This generates the C code for CorrMM (direction="forward"),
CorrMM_gradWeights (direction="backprop weights"), and
CorrMM_gradInputs (direction="backprop inputs").
Depending on the direction, one of bottom, weights, top will
receive the output, while the other two serve as inputs.
:param bottom: Variable name of the input images in the forward pass,
or the gradient of the input images in backprop wrt. inputs
:param weights: Variable name of the filters in the forward pass,
or the gradient of the filters in backprop wrt. weights
:param top: Variable name of the output images / feature maps in the
forward pass, or the gradient of the outputs in the backprop passes
:param direction: "forward" to correlate bottom with weights and store
results in top,
"backprop weights" to do a valid convolution of bottom with top
(swapping the first two dimensions) and store results in weights,
and "backprop inputs" to do a full convolution of top with weights
(swapping the first two dimensions) and store results in bottom.
:param sub: Dictionary of substitutions useable to help generating the
C code.
:param height: If self.subsample[0] != 1, a variable giving the height
of the filters for direction="backprop weights" or the height of
the input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the height of the
filters for direction="backprop weights". Ignored otherwise.
:param width: If self.subsample[1] != 1, a variable giving the width
of the filters for direction="backprop weights" or the width of the
input images for direction="backprop inputs".
If self.border_mode == 'half', a variable giving the width of the
filters for direction="backprop weights". Ignored otherwise.
"""
dH
,
dW
=
self
.
subsample
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
padH
=
padW
=
-
2
elif
isinstance
(
self
.
border_mode
,
tuple
):
padH
,
padW
=
self
.
border_mode
else
:
assert
self
.
border_mode
==
"valid"
padH
=
padW
=
0
if
direction
==
"forward"
:
direction
=
0
out
=
top
elif
direction
==
"backprop weights"
:
direction
=
1
out
=
weights
elif
direction
==
"backprop inputs"
:
direction
=
2
out
=
bottom
else
:
raise
ValueError
(
"direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'"
)
# When subsampling, we cannot unambiguously infer the height and width
# of bottom and weights from top, so we require them to be given.
# Similarly, when border_mode="half", we cannot infer the weight size.
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
not
height
:
raise
ValueError
(
"height must be given for backprop with vertical sampling or border_mode='half'"
)
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
else
:
height
=
'NULL'
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
if
not
width
:
raise
ValueError
(
"width must be given for backprop with horizontal sampling or border_mode='half'"
)
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
else
:
width
=
'NULL'
sub
=
sub
.
copy
()
sub
.
update
(
locals
())
return
"""
// Mandatory args
int direction =
%(direction)
s; // forward, bprop weights, bprop inputs
// Optional args
int dH =
%(dH)
s;
int dW =
%(dW)
s;
int padH =
%(padH)
s;
int padW =
%(padW)
s;
PyArrayObject * bottom =
%(bottom)
s;
PyArrayObject * weights =
%(weights)
s;
PyArrayObject * top =
%(top)
s;
PyArrayObject * out2 = NULL;
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
int kH, kW;
if (direction != 1) {
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2];
kW = PyArray_DIMS(weights)[3];
}
else {
if ((dH != 1) || (padH == -1)) {
// vertical subsampling or half padding, kernel height is specified
kH =
%(height)
s;
}
else if (padH == -2) {
// vertical full padding, we can infer the kernel height
kH = 2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH;
}
else {
// explicit padding, we can infer the kernel height
kH = PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH;
}
if ((dW != 1) || (padW == -1)) {
kW =
%(width)
s;
}
else if (padW == -2) {
kW = 2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW;
}
else {
kW = PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW;
}
}
// Auto-padding if requested
if (padH == -1) { // vertical half padding
padH = kH / 2;
}
else if (padH == -2) { // vertical full padding
padH = kH - 1;
}
else if (padH < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH must be >= -2");
%(fail)
s
}
if (padW == -1) { // horizontal half padding
padW = kW / 2;
}
else if (padW == -2) { // horizontal full padding
padW = kW - 1;
}
else if (padW < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW must be >= -2");
%(fail)
s
}
// Infer output shape
npy_intp out_dim[4];
switch(direction) {
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad - weight) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - PyArray_DIMS(weights)[2]) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - PyArray_DIMS(weights)[3]) / dW + 1);
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = bottom + 2*pad - (top - 1) * sample
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + weights - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + PyArray_DIMS(weights)[2] - 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + PyArray_DIMS(weights)[3] - 2*padW);
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2
\\
n");
%(fail)
s
}
// Prepare output array
int typenum;
if ( !(
%(out)
s
&& PyArray_NDIM(
%(out)
s)==4
&& PyArray_IS_C_CONTIGUOUS(
%(out)
s)
&& PyArray_DIMS(
%(out)
s)[0]==out_dim[0]
&& PyArray_DIMS(
%(out)
s)[1]==out_dim[1]
&& PyArray_DIMS(
%(out)
s)[2]==out_dim[2]
&& PyArray_DIMS(
%(out)
s)[3]==out_dim[3]))
{
Py_XDECREF(
%(out)
s);
if (direction != 1) {
typenum = PyArray_TYPE(weights);
}
else {
typenum = PyArray_TYPE(bottom);
}
%(out)
s = (PyArrayObject*)PyArray_EMPTY(4,
out_dim,
typenum,
0);
if (NULL ==
%(out)
s)
{
PyErr_Format(PyExc_RuntimeError,
"BaseCorrMM: Failed to allocate output of
%%
d x
%%
d x
%%
d x
%%
d",
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)
s
}
}
// Call corrMM code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, padH, padW);
if (out2==NULL){
%(fail)
s
}
assert (out2 ==
%(out)
s);
"""
%
sub
class
CorrMM
(
BaseCorrMM
):
"""
CPU correlation implementation using Matrix Multiplication.
Parameters
----------
border_mode
The width of a border of implicit zeros to pad the
input with. Must be a tuple with 2 elements giving the numbers of rows
and columns to pad on each side, or a single integer to pad the same
on all sides, or a string shortcut setting the padding at runtime:
``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'``
for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution),
``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same
convolution for odd-sized kernels). Note that the two widths are each
applied twice, once per side (left and right, top and bottom).
subsample
The subsample operation applied to each output image.
Should be a tuple with 2 elements.
`(sv, sh)` is equivalent to `CorrMM(...)(...)[:,:,::sv, ::sh]`,
but faster.
Set to `(1, 1)` to disable subsampling.
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
super
(
CorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
)
def
make_node
(
self
,
img
,
kern
):
img
=
as_tensor_variable
(
img
)
kern
=
as_tensor_variable
(
kern
)
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
broadcastable
=
[
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
False
,
False
]
dtype
=
img
.
type
.
dtype
return
Apply
(
self
,
[
img
,
kern
],
[
TensorType
(
dtype
,
broadcastable
)()])
def
infer_shape
(
self
,
node
,
input_shape
):
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
padH
=
padW
=
-
2
elif
isinstance
(
self
.
border_mode
,
tuple
):
padH
,
padW
=
self
.
border_mode
else
:
assert
self
.
border_mode
==
"valid"
padH
=
padW
=
0
dH
,
dW
=
self
.
subsample
imshp
=
input_shape
[
0
]
kshp
=
input_shape
[
1
]
bsize
,
imshp
=
imshp
[
0
],
list
(
imshp
[
2
:])
nkern
,
kshp
=
kshp
[
0
],
list
(
kshp
[
2
:])
kH
,
kW
=
kshp
if
padH
==
-
1
:
padH
=
kH
//
2
elif
padH
==
-
2
:
padH
=
kH
-
1
elif
padH
<
0
:
raise
ValueError
(
"CorrMM: border_mode must be >= 0"
)
if
padW
==
-
1
:
padW
=
kW
//
2
elif
padW
==
-
2
:
padW
=
kW
-
1
elif
padW
<
0
:
raise
ValueError
(
"CorrMM: border_mode must be >= 0"
)
out_shp0
=
(
imshp
[
0
]
+
2
*
padH
-
kshp
[
0
])
//
dH
+
1
out_shp1
=
(
imshp
[
1
]
+
2
*
padW
-
kshp
[
1
])
//
dW
+
1
out_shp
=
(
out_shp0
,
out_shp1
)
return
[(
bsize
,
nkern
)
+
out_shp
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
bottom
,
weights
=
inp
top
,
=
out_
direction
=
"forward"
return
super
(
CorrMM
,
self
)
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
)
def
grad
(
self
,
inp
,
grads
):
bottom
,
weights
=
inp
top
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
class
CorrMM_gradWeights
(
BaseCorrMM
):
"""
Gradient wrt. filters for `CorrMM`.
Notes
-----
You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
super
(
CorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
img
=
as_tensor_variable
(
img
)
topgrad
=
as_tensor_variable
(
topgrad
)
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
self
.
subsample
!=
(
1
,
1
)
or
self
.
border_mode
==
"half"
:
if
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
' or border_mode == "half"'
)
height_width
=
[
shape
[
0
],
shape
[
1
]]
else
:
height_width
=
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
dtype
=
img
.
type
.
dtype
return
Apply
(
self
,
[
img
,
topgrad
]
+
height_width
,
[
TensorType
(
dtype
,
broadcastable
)()])
def
infer_shape
(
self
,
node
,
input_shape
):
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
padH
=
padW
=
-
2
elif
isinstance
(
self
.
border_mode
,
tuple
):
padH
,
padW
=
self
.
border_mode
else
:
assert
self
.
border_mode
==
"valid"
padH
=
padW
=
0
dH
,
dW
=
self
.
subsample
imshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
ssize
,
imshp
=
imshp
[
1
],
list
(
imshp
[
2
:])
nkern
,
topshp
=
topshp
[
1
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
if
((
dH
!=
1
)
or
(
padH
==
-
1
)):
# vertical subsampling or half padding, kernel height is specified
kH
=
height_width
[
0
]
elif
padH
==
-
2
:
# vertical full padding, we can infer the kernel height
kH
=
2
-
imshp
[
0
]
+
(
topshp
[
0
]
-
1
)
*
dH
else
:
# explicit padding, we can infer the kernel height
kH
=
imshp
[
0
]
+
2
*
padH
-
(
topshp
[
0
]
-
1
)
*
dH
if
((
dW
!=
1
)
or
(
padW
==
-
1
)):
kW
=
height_width
[
1
]
elif
(
padW
==
-
2
):
kW
=
2
-
imshp
[
1
]
+
(
topshp
[
1
]
-
1
)
*
dW
else
:
kW
=
imshp
[
1
]
+
2
*
padW
-
(
topshp
[
1
]
-
1
)
*
dW
return
[(
nkern
,
ssize
,
kH
,
kW
)]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
bottom
,
top
=
inp
[:
2
]
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
weights
,
=
out_
direction
=
"backprop weights"
return
super
(
CorrMM_gradWeights
,
self
)
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
)
def
grad
(
self
,
inp
,
grads
):
bottom
,
top
=
inp
[:
2
]
weights
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
return
(
d_bottom
,
d_top
)
+
d_height_width
def
connection_pattern
(
self
,
node
):
if
node
.
nin
==
2
:
return
[[
1
],
[
1
]]
else
:
return
[[
1
],
[
1
],
[
0
],
[
0
]]
# no connection to height, width
class
CorrMM_gradInputs
(
BaseCorrMM
):
"""
Gradient wrt. inputs for `CorrMM`.
Notes
-----
You will not want to use this directly, but rely on
Theano's automatic differentiation or graph optimization to
use it as needed.
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
super
(
CorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
kern
=
as_tensor_variable
(
kern
)
topgrad
=
as_tensor_variable
(
topgrad
)
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
self
.
subsample
!=
(
1
,
1
)
and
shape
is
None
:
raise
ValueError
(
'shape must be given if subsample != (1, 1)'
)
height_width
=
[
shape
[
0
],
shape
[
1
]]
if
self
.
subsample
!=
(
1
,
1
)
else
[]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
False
,
False
]
dtype
=
kern
.
type
.
dtype
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
[
TensorType
(
dtype
,
broadcastable
)()])
def
infer_shape
(
self
,
node
,
input_shape
):
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
padH
=
padW
=
-
2
elif
isinstance
(
self
.
border_mode
,
tuple
):
padH
,
padW
=
self
.
border_mode
else
:
assert
self
.
border_mode
==
"valid"
padH
=
padW
=
0
dH
,
dW
=
self
.
subsample
kshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
ssize
,
kshp
=
kshp
[
1
],
list
(
kshp
[
2
:])
bsize
,
topshp
=
topshp
[
0
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
if
padH
==
-
1
:
padH
=
kshp
[
0
]
//
2
elif
padH
==
-
2
:
padH
=
kshp
[
0
]
-
1
elif
padH
<
-
2
:
raise
ValueError
(
'CorrMM_gradInputs: border_mode must be >= 0.'
)
if
padW
==
-
1
:
padW
=
kshp
[
1
]
//
2
elif
padW
==
-
2
:
padW
=
kshp
[
1
]
-
1
elif
padW
<
-
2
:
raise
ValueError
(
'CorrMM_gradInputs: border_mode must be >= 0.'
)
if
dH
!=
1
:
out_shp0
=
height_width
[
0
]
else
:
out_shp0
=
(
topshp
[
0
]
-
1
)
*
dH
+
kshp
[
0
]
-
2
*
padH
if
dW
!=
1
:
out_shp1
=
height_width
[
1
]
else
:
out_shp1
=
(
topshp
[
1
]
-
1
)
*
dW
+
kshp
[
1
]
-
2
*
padW
out_shp
=
(
out_shp0
,
out_shp1
)
return
[(
bsize
,
ssize
)
+
out_shp
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
weights
,
top
=
inp
[:
2
]
height
,
width
=
inp
[
2
:]
or
(
None
,
None
)
bottom
,
=
out_
direction
=
"backprop inputs"
return
super
(
CorrMM_gradInputs
,
self
)
.
c_code_helper
(
bottom
,
weights
,
top
,
direction
,
sub
,
height
,
width
)
def
grad
(
self
,
inp
,
grads
):
weights
,
top
=
inp
[:
2
]
bottom
,
=
grads
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
return
(
d_weights
,
d_top
)
+
d_height_width
def
connection_pattern
(
self
,
node
):
if
node
.
nin
==
2
:
return
[[
1
],
[
1
]]
else
:
return
[[
1
],
[
1
],
[
0
],
[
0
]]
# no connection to height, width
theano/tensor/nnet/corr_gemm.c
0 → 100644
浏览文件 @
0c599015
// This uses a lot of code from Caffe (http://caffe.berkeleyvision.org/);
// sources are clearly marked. Below we reproduce the original license of
// the Caffe software.
/*
Copyright (c) 2014, The Regents of the University of California (Regents)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cpp)
// Loops for fast unfold + copy
void
im2col
(
const
%
(
float_type
)
s
*
data_im
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
%
(
float_type
)
s
*
data_col
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
kernel_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
kernel_w
)
/
stride_w
+
1
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_im
[(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
else
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
0
.;
}
}
}
}
// Unlike the Caffe and Theano GPU verions, the data_im array is set to zero
// before the col2im call rather than doing it here. So, the result is just
// accumulated into data_im.
void
col2im
(
const
%
(
float_type
)
s
*
data_col
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
patch_h
,
const
int
patch_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
%
(
float_type
)
s
*
data_im
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
patch_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
patch_w
)
/
stride_w
+
1
;
int
num_kernels
=
channels
*
height
*
width
;
int
channels_col
=
channels
*
patch_h
*
patch_w
;
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%%
patch_w
;
int
h_offset
=
(
c
/
patch_w
)
%%
patch_h
;
int
c_im
=
c
/
patch_h
/
patch_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_im
[(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
]
+=
data_col
[(
c
*
height_col
+
h
)
*
width_col
+
w
];
}
}
}
}
// Theano op code
// GPU version authors: Arjun Jain, Frederic Bastien, Jan Schlueter
// Reference code: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// and https://github.com/torch/cunn/blob/master/SpatialConvolutionMM.cu
// CPU version author: Jesse Livezey
// CPU version adapted from GPU version
PyArrayObject
*
corrMM
(
PyArrayObject
*
bottom
,
PyArrayObject
*
weight
,
PyArrayObject
*
top
,
const
int
direction
,
const
int
dH
=
1
,
const
int
dW
=
1
,
const
int
padH
=
0
,
const
int
padW
=
0
)
{
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM requires bottom of 4D"
);
return
NULL
;
}
if
(
PyArray_TYPE
(
bottom
)
!=
%
(
float_typenum
)
s
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM received bottom with wrong type."
);
return
NULL
;
}
if
(
PyArray_NDIM
(
weight
)
!=
4
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM requires weight of 4D"
);
return
NULL
;
}
if
(
PyArray_TYPE
(
weight
)
!=
%
(
float_typenum
)
s
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM received weight with wrong type."
);
return
NULL
;
}
if
(
PyArray_NDIM
(
top
)
!=
4
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM requires top of 4D"
);
return
NULL
;
}
if
(
PyArray_TYPE
(
top
)
!=
%
(
float_typenum
)
s
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM received top with wrong type."
);
return
NULL
;
}
// Ensure data is contiguous
bottom
=
PyArray_GETCONTIGUOUS
(
bottom
);
weight
=
PyArray_GETCONTIGUOUS
(
weight
);
top
=
PyArray_GETCONTIGUOUS
(
top
);
// Extract some shape information for later and check shape consistency
// bottom: (batchSize, nChannels, bottomHeight, bottomWidth)
const
int
batchSize
=
PyArray_DIMS
(
bottom
)[
0
];
const
int
nChannels
=
PyArray_DIMS
(
bottom
)[
1
];
const
int
bottomHeight
=
PyArray_DIMS
(
bottom
)[
2
];
const
int
bottomWidth
=
PyArray_DIMS
(
bottom
)[
3
];
// weights: (nFilters, nChannels, rows, columns)
const
int
nFilters
=
PyArray_DIMS
(
weight
)[
0
];
const
int
kH
=
PyArray_DIMS
(
weight
)[
2
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
3
];
if
(
nChannels
!=
PyArray_DIMS
(
weight
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
}
// top: (batchSize, nFilters, topHeight, topWidth)
const
int
topHeight
=
(
bottomHeight
+
2
*
padH
-
kH
)
/
dH
+
1
;
const
int
topWidth
=
(
bottomWidth
+
2
*
padW
-
kW
)
/
dW
+
1
;
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"CorrMM shape inconsistency:
\n
"
" bottom shape: %%d %%d %%d %%d
\n
"
" weight shape: %%d %%d %%d %%d
\n
"
" top shape: %%d %%d %%d %%d (expected %%d %%d %%d %%d)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
nChannels
,
kH
,
kW
,
PyArray_DIMS
(
top
)[
0
],
PyArray_DIMS
(
top
)[
1
],
PyArray_DIMS
(
top
)[
2
],
PyArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
// Create temporary columns
npy_intp
col_dim
[
2
];
col_dim
[
0
]
=
(
npy_intp
)(
nChannels
*
kW
*
kH
);
col_dim
[
1
]
=
(
npy_intp
)(
topHeight
*
topWidth
);
PyArrayObject
*
col
=
(
PyArrayObject
*
)
PyArray_EMPTY
(
2
,
col_dim
,
PyArray_TYPE
(
top
),
0
);
if
(
NULL
==
col
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"CorrMM failed to allocate working memory of %%d x %%d
\n
"
,
col_dim
[
0
],
col_dim
[
1
]);
return
NULL
;
}
// Define some useful variables
const
int
bottom_stride
=
PyArray_STRIDES
(
bottom
)[
0
]
/%
(
n_bytes
)
f
;
const
int
top_stride
=
PyArray_STRIDES
(
top
)[
0
]
/%
(
n_bytes
)
f
;
const
int
K_
=
col_dim
[
0
];
const
int
N_
=
col_dim
[
1
];
const
int
M_
=
nFilters
;
const
%
(
c_float_type
)
s
one
=
1
.
0
;
const
%
(
c_float_type
)
s
zero
=
0
.
0
;
char
NTrans
=
'N'
;
char
Trans
=
'T'
;
PyArrayObject
*
output
;
if
(
direction
==
0
)
{
// forward pass
output
=
top
;
// valid correlation: im2col, then gemm
// Iterate over batch
for
(
int
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
// Second, gemm
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
&
N_
,
&
M_
,
&
K_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
);
}
/*
// Original caffe code for comparison
// Note that this code was translated from the Theano GPU code,
// not the Caffe CPU code.
// https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// Note that this is for grouped convolution; we can ignore groups here,
// but the group-related offsets help explain what M_, N_ and K_ are
int weight_offset = M_ * K_;
int col_offset = K_ * N_;
int top_offset = M_ * N_;
for (int n = 0; n < num_; ++n) {
// First, im2col
im2col_gpu(bottom_data + bottom[i]->offset(n), channels_, height_,
width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
col_data);
// Second, innerproduct with groups
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
(Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
(Dtype)0., top_data + (*top)[i]->offset(n) + top_offset * g);
== (see https://github.com/BVLC/caffe/blob/master/src/caffe/util/math_functions.cu#L16)
cublasSgemm(CUBLAS_OP_N, CUBLAS_OP_N,
N_, M_, K_,
1.,
col_data + col_offset * g, N_,
weight + weight_offset * g, K_,
0.,
top_data + (*top)[i]->offset(n) + top_offset * g, N_);
}
}
*/
}
else
if
(
direction
==
1
)
{
// backprop wrt. weights
output
=
weight
;
// valid convolution: im2col, then gemm
// Iterate over batch
for
(
int
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
&
K_
,
&
M_
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
&
K_
);
}
/*
// Original caffe code for comparison
// Note that this code was translated from the Theano GPU code,
// not the Caffe CPU code.
// https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
// Note that this is for grouped convolution; we can ignore groups
for (int n = 0; n < num_; ++n) {
// Since we saved memory in the forward pass by not storing all col
// data, we will need to recompute them.
im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
width_, kernel_h_, kernel_w_, pad_h_, pad_w_,
stride_h_, stride_w_, col_data);
// gradient w.r.t. weight. Note that we will accumulate diffs.
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
(Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
col_data + col_offset * g, (Dtype)1.,
weight_diff + weight_offset * g);
== (see https://github.com/BVLC/caffe/blob/master/src/caffe/util/math_functions.cu#L16)
cublasSgemm(CUBLAS_OP_T, CUBLAS_OP_N, K_, M_, N_,
1.0,
col_data + col_offset * g, N_,
top_diff + top[i]->offset(n) + top_offset * g, N_,
1.0,
weight_diff + weight_offset * g, K_);
}
}
*/
}
else
if
(
direction
==
2
)
{
// backprop wrt. inputs
output
=
bottom
;
// bottom is set to zero here rather than inside of col2im
PyArray_FILLWBYTE
(
bottom
,
0
);
// full convolution: gemm, then col2im
// Iterate over batch
for
(
int
n
=
0
;
n
<
batchSize
;
n
++
)
{
// gemm into columns
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
&
N_
,
&
K_
,
&
M_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
&
N_
);
// col2im back to the data
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
);
}
/*
// Original caffe code for comparison
// Note that this code was translated from the Theano GPU code,
// not the Caffe CPU code.
// https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu
for (int n = 0; n < num_; ++n) {
// gradient w.r.t. bottom data, if necessary
if (propagate_down[i]) {
for (int g = 0; g < group_; ++g) {
caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
(Dtype)1., weight + weight_offset * g,
top_diff + top[i]->offset(n) + top_offset * g,
(Dtype)0., col_diff + col_offset * g);
== (see https://github.com/BVLC/caffe/blob/master/src/caffe/util/math_functions.cu#L16)
cublasSgemm(CUBLAS_OP_N, CUBLAS_OP_T, N_, K_, M_,
1.,
top_diff + top[i]->offset(n) + top_offset * g, N_,
weight + weight_offset * g, K_,
0.,
col_diff + col_offset * g, N_);
}
// col2im back to the data
col2im_gpu(col_diff, channels_, height_, width_,
kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_,
bottom_diff + (*bottom)[i]->offset(n));
}
}
*/
}
// Free temporary columns
Py_DECREF
(
col
);
// decref from contiguous check
Py_DECREF
(
bottom
);
Py_DECREF
(
weight
);
Py_DECREF
(
top
);
// Note that we don't change the refcount of the output matrix here. Output
// (re)allocation and refcounting is done in BaseCorrMM.c_code_helper();
// in here output is just aliased to one of bottom, weights, or top.
return
output
;
}
theano/tensor/nnet/tests/test_corr.py
0 → 100644
浏览文件 @
0c599015
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.attrib
import
attr
import
numpy
import
theano
import
theano.tensor
as
T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.basic
import
_allclose
class
TestCorr2D
(
utt
.
InferShapeTester
):
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
else
:
mode
=
None
dtype
=
theano
.
config
.
floatX
def
setUp
(
self
):
super
(
TestCorr2D
,
self
)
.
setUp
()
self
.
input
=
T
.
tensor4
(
'input'
,
dtype
=
self
.
dtype
)
self
.
input
.
name
=
'default_V'
self
.
filters
=
T
.
tensor4
(
'filters'
,
dtype
=
self
.
dtype
)
self
.
filters
.
name
=
'default_filters'
if
not
conv
.
imported_scipy_signal
and
theano
.
config
.
cxx
==
""
:
raise
SkipTest
(
"CorrMM tests need SciPy or a c++ compiler"
)
def
validate
(
self
,
image_shape
,
filter_shape
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
input
=
None
,
filters
=
None
,
verify_grad
=
True
,
non_contiguous
=
False
):
"""
:param image_shape: The constant shape info passed to corrMM.
:param filter_shape: The constant shape info passed to corrMM.
"""
N_image_shape
=
[
T
.
get_scalar_constant_value
(
T
.
as_tensor_variable
(
x
))
for
x
in
image_shape
]
N_filter_shape
=
[
T
.
get_scalar_constant_value
(
T
.
as_tensor_variable
(
x
))
for
x
in
filter_shape
]
if
input
is
None
:
input
=
self
.
input
if
filters
is
None
:
filters
=
self
.
filters
# THEANO IMPLEMENTATION
# we create a symbolic function so that verify_grad can work
def
sym_CorrMM
(
input
,
filters
):
# define theano graph and function
input
.
name
=
'input'
filters
.
name
=
'filters'
rval
=
corr
.
CorrMM
(
border_mode
,
subsample
)(
input
,
filters
)
rval
.
name
=
'corr_output'
return
rval
output
=
sym_CorrMM
(
input
,
filters
)
output
.
name
=
'CorrMM()(
%
s,
%
s)'
%
(
input
.
name
,
filters
.
name
)
theano_corr
=
theano
.
function
([
input
,
filters
],
output
,
mode
=
self
.
mode
)
# initialize input and compute result
image_data
=
numpy
.
random
.
random
(
N_image_shape
)
.
astype
(
self
.
dtype
)
filter_data
=
numpy
.
random
.
random
(
N_filter_shape
)
.
astype
(
self
.
dtype
)
if
non_contiguous
:
image_data
=
numpy
.
transpose
(
image_data
,
axes
=
(
0
,
1
,
3
,
2
))
image_data
=
image_data
.
copy
()
image_data
=
numpy
.
transpose
(
image_data
,
axes
=
(
0
,
1
,
3
,
2
))
filter_data
=
numpy
.
transpose
(
filter_data
,
axes
=
(
0
,
1
,
3
,
2
))
filter_data
=
filter_data
.
copy
()
filter_data
=
numpy
.
transpose
(
filter_data
,
axes
=
(
0
,
1
,
3
,
2
))
assert
not
image_data
.
flags
[
'CONTIGUOUS'
]
assert
not
filter_data
.
flags
[
'CONTIGUOUS'
]
theano_output
=
theano_corr
(
image_data
,
filter_data
)
# REFERENCE IMPLEMENTATION
# Testing correlation, not convolution. Reverse filters.
filter_data_corr
=
numpy
.
array
(
filter_data
[:,
:,
::
-
1
,
::
-
1
],
copy
=
True
,
order
=
'C'
)
orig_image_data
=
image_data
img_shape2d
=
numpy
.
array
(
N_image_shape
[
-
2
:])
fil_shape2d
=
numpy
.
array
(
N_filter_shape
[
-
2
:])
subsample2d
=
numpy
.
array
(
subsample
)
if
border_mode
==
'full'
:
padHW
=
(
fil_shape2d
-
1
)
elif
border_mode
==
'valid'
:
padHW
=
numpy
.
array
([
0
,
0
])
elif
border_mode
==
'half'
:
padHW
=
numpy
.
floor
(
fil_shape2d
/
2
)
elif
isinstance
(
border_mode
,
tuple
):
padHW
=
numpy
.
array
(
border_mode
)
elif
isinstance
(
border_mode
,
int
):
padHW
=
numpy
.
array
([
border_mode
,
border_mode
])
else
:
raise
NotImplementedError
(
'Unsupported border_mode {}'
.
format
(
border_mode
))
out_shape2d
=
numpy
.
floor
((
img_shape2d
+
2
*
(
padHW
)
-
fil_shape2d
)
/
subsample2d
)
+
1
out_shape
=
(
N_image_shape
[
0
],
N_filter_shape
[
0
])
+
tuple
(
out_shape2d
)
ref_output
=
numpy
.
zeros
(
out_shape
)
# loop over output feature maps
ref_output
.
fill
(
0
)
image_data2
=
numpy
.
zeros
((
N_image_shape
[
0
],
N_image_shape
[
1
],
N_image_shape
[
2
]
+
2
*
padHW
[
0
],
N_image_shape
[
3
]
+
2
*
padHW
[
1
]))
image_data2
[:,
:,
padHW
[
0
]:
padHW
[
0
]
+
N_image_shape
[
2
],
padHW
[
1
]:
padHW
[
1
]
+
N_image_shape
[
3
]]
=
image_data
image_data
=
image_data2
N_image_shape
=
image_data
.
shape
for
bb
in
range
(
N_image_shape
[
0
]):
for
nn
in
range
(
N_filter_shape
[
0
]):
for
im0
in
range
(
N_image_shape
[
1
]):
filter2d
=
filter_data_corr
[
nn
,
im0
,
:,
:]
image2d
=
image_data
[
bb
,
im0
,
:,
:]
for
row
in
range
(
ref_output
.
shape
[
2
]):
irow
=
row
*
subsample
[
0
]
# image row
for
col
in
range
(
ref_output
.
shape
[
3
]):
icol
=
col
*
subsample
[
1
]
# image col
ref_output
[
bb
,
nn
,
row
,
col
]
+=
(
image2d
[
irow
:
irow
+
N_filter_shape
[
2
],
icol
:
icol
+
N_filter_shape
[
3
]]
*
filter2d
[::
-
1
,
::
-
1
]
)
.
sum
()
self
.
assertTrue
(
_allclose
(
theano_output
,
ref_output
))
# TEST GRADIENT
if
verify_grad
:
utt
.
verify_grad
(
sym_CorrMM
,
[
orig_image_data
,
filter_data
])
@attr
(
'slow'
)
def
test_basic
(
self
):
"""
Tests that basic correlations work for odd and even
dimensions of image and filter shapes, as well as rectangular
images and filters.
"""
border_modes
=
[
'valid'
,
'full'
,
'half'
,
(
1
,
1
),
(
2
,
1
),
(
1
,
2
),
(
3
,
3
),
1
]
img_shapes
=
[(
2
,
2
,
3
,
3
),
(
3
,
2
,
8
,
8
),
(
3
,
2
,
7
,
5
),
(
3
,
2
,
7
,
5
),
(
3
,
2
,
8
,
8
),
(
3
,
2
,
7
,
5
)]
fil_shapes
=
[(
2
,
2
,
2
,
2
),
(
4
,
2
,
5
,
5
),
(
5
,
2
,
2
,
3
),
(
5
,
2
,
3
,
2
),
(
4
,
2
,
5
,
5
),
(
5
,
2
,
2
,
3
)]
for
border_mode
in
border_modes
:
for
img
,
fil
in
zip
(
img_shapes
,
fil_shapes
):
self
.
validate
(
img
,
fil
,
border_mode
,
verify_grad
=
False
)
# Very slow on with 'full' or 'half'
self
.
validate
((
1
,
10
,
213
,
129
),
(
46
,
10
,
212
,
1
),
'valid'
,
verify_grad
=
False
)
def
test_img_kernel_same_shape
(
self
):
self
.
validate
((
3
,
2
,
3
,
3
),
(
4
,
2
,
3
,
3
),
'full'
)
self
.
validate
((
3
,
2
,
3
,
3
),
(
4
,
2
,
3
,
3
),
'valid'
)
self
.
validate
((
3
,
2
,
3
,
3
),
(
4
,
2
,
3
,
3
),
'half'
)
self
.
validate
((
3
,
2
,
3
,
3
),
(
4
,
2
,
3
,
3
),
(
1
,
1
))
self
.
validate
((
3
,
2
,
3
,
3
),
(
4
,
2
,
3
,
3
),
1
)
@attr
(
'slow'
)
def
test_subsample
(
self
):
"""
Tests correlation where subsampling != (1,1)
"""
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'valid'
,
subsample
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'valid'
,
subsample
=
(
2
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
'valid'
,
subsample
=
(
3
,
3
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
,
subsample
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
,
subsample
=
(
2
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
'full'
,
subsample
=
(
3
,
3
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'half'
,
subsample
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'half'
,
subsample
=
(
2
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
'half'
,
subsample
=
(
3
,
3
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
(
1
,
1
),
subsample
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
(
2
,
1
),
subsample
=
(
2
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
(
1
,
2
),
subsample
=
(
3
,
3
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
1
,
subsample
=
(
3
,
3
))
@attr
(
'slow'
)
def
test_shape_Constant_tensor
(
self
):
"""
Tests correlation where the {image,filter}_shape is a Constant tensor.
"""
as_t
=
T
.
as_tensor_variable
border_modes
=
[
'valid'
,
'full'
,
'half'
,
(
1
,
1
),
(
2
,
1
),
(
1
,
2
),
(
3
,
3
),
1
]
for
border_mode
in
border_modes
:
self
.
validate
((
as_t
(
3
),
as_t
(
2
),
as_t
(
7
),
as_t
(
5
)),
(
5
,
2
,
2
,
3
),
border_mode
)
self
.
validate
(
as_t
([
3
,
2
,
7
,
5
]),
(
5
,
2
,
2
,
3
),
border_mode
)
self
.
validate
(
as_t
((
3
,
2
,
7
,
5
)),
(
5
,
2
,
2
,
3
),
border_mode
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
as_t
(
5
),
as_t
(
2
),
as_t
(
2
),
as_t
(
3
)),
'valid'
)
self
.
validate
((
3
,
2
,
7
,
5
),
as_t
([
5
,
2
,
2
,
3
]),
border_mode
)
self
.
validate
(
as_t
([
3
,
2
,
7
,
5
]),
as_t
([
5
,
2
,
2
,
3
]),
border_mode
)
def
test_invalid_filter_shape
(
self
):
"""
Tests scenario where filter_shape[1] != input_shape[1]
"""
self
.
assertRaises
(
ValueError
,
self
.
validate
,
(
3
,
2
,
8
,
8
),
(
4
,
3
,
5
,
5
),
'valid'
)
def
test_full_mode
(
self
):
"""
Tests basic correlation in full mode and case where filter
is larger than the input image.
"""
self
.
validate
((
3
,
2
,
5
,
5
),
(
4
,
2
,
8
,
8
),
'full'
)
def
f
():
self
.
validate
((
3
,
2
,
5
,
5
),
(
4
,
2
,
8
,
8
),
'valid'
)
self
.
assertRaises
(
Exception
,
f
)
def
test_wrong_input
(
self
):
"""
Make sure errors are raised when image and kernel are not 4D tensors
"""
self
.
assertRaises
(
Exception
,
self
.
validate
,
(
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
input
=
T
.
dmatrix
())
self
.
assertRaises
(
Exception
,
self
.
validate
,
(
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
filters
=
T
.
dvector
())
self
.
assertRaises
(
Exception
,
self
.
validate
,
(
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
input
=
T
.
dtensor3
())
@attr
(
'slow'
)
def
test_infer_shape_forward
(
self
):
def
rand
(
*
shape
):
r
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float64'
)
return
r
*
2
-
1
corrMM
=
corr
.
CorrMM
adtens
=
T
.
dtensor4
()
bdtens
=
T
.
dtensor4
()
aivec_vals
=
[[
4
,
5
,
6
,
3
],
[
6
,
2
,
8
,
3
],
[
3
,
6
,
7
,
5
],
[
3
,
6
,
7
,
5
],
[
5
,
2
,
4
,
3
]]
bivec_vals
=
[[
7
,
5
,
3
,
2
],
[
4
,
2
,
5
,
3
],
[
5
,
6
,
3
,
2
],
[
5
,
6
,
2
,
3
],
[
6
,
2
,
4
,
3
]]
modes
=
[
'valid'
,
'full'
,
'half'
,
(
1
,
1
),
(
2
,
1
),
(
1
,
2
),
1
]
subsamples
=
[(
1
,
1
),
(
2
,
1
),
(
1
,
2
)]
for
aivec_val
,
bivec_val
in
zip
(
aivec_vals
,
bivec_vals
):
adtens_val
=
rand
(
*
aivec_val
)
bdtens_val
=
rand
(
*
bivec_val
)
for
mode
in
modes
:
for
subsample
in
subsamples
:
# CorrMM
cdtens
=
corrMM
(
border_mode
=
mode
,
subsample
=
subsample
)(
adtens
,
bdtens
)
self
.
_compile_and_check
([
adtens
,
bdtens
],
[
cdtens
],
[
adtens_val
,
bdtens_val
],
corrMM
,
warn
=
False
)
@attr
(
'slow'
)
def
test_infer_shape_gradW
(
self
):
def
rand
(
*
shape
):
r
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float64'
)
return
r
*
2
-
1
corrMM
=
corr
.
CorrMM
gradW
=
corr
.
CorrMM_gradWeights
adtens
=
T
.
dtensor4
()
bdtens
=
T
.
dtensor4
()
aivec_vals
=
[[
1
,
5
,
6
,
3
],
[
8
,
2
,
7
,
3
],
[
1
,
6
,
9
,
4
],
[
9
,
6
,
8
,
5
],
[
9
,
1
,
6
,
8
]]
bivec_vals
=
[[
7
,
5
,
3
,
1
],
[
4
,
2
,
5
,
3
],
[
12
,
6
,
3
,
2
],
[
5
,
6
,
1
,
3
],
[
11
,
1
,
3
,
3
]]
modes
=
[
'valid'
,
'full'
,
'half'
,
(
1
,
1
),
(
2
,
1
),
(
1
,
2
),
1
]
subsamples
=
[(
1
,
1
),
(
2
,
1
),
(
1
,
2
)]
for
aivec_val
,
bivec_val
in
zip
(
aivec_vals
,
bivec_vals
):
adtens_val
=
rand
(
*
aivec_val
)
bdtens_val
=
rand
(
*
bivec_val
)
for
mode
in
modes
:
for
subsample
in
subsamples
:
# CorrMM
cdtens
=
corrMM
(
border_mode
=
mode
,
subsample
=
subsample
)(
adtens
,
bdtens
)
f
=
theano
.
function
([
adtens
,
bdtens
],
cdtens
)
cdtens_val
=
f
(
adtens_val
,
bdtens_val
)
# CorrMM_gradWeights
shape
=
(
theano
.
shared
(
bivec_val
[
2
]),
theano
.
shared
(
bivec_val
[
3
]))
bdtens_g
=
gradW
(
border_mode
=
mode
,
subsample
=
subsample
)(
adtens
,
cdtens
,
shape
=
shape
)
self
.
_compile_and_check
([
adtens
,
cdtens
],
[
bdtens_g
],
[
adtens_val
,
cdtens_val
],
gradW
,
warn
=
False
)
@attr
(
'slow'
)
def
test_infer_shape_gradI
(
self
):
def
rand
(
*
shape
):
r
=
numpy
.
asarray
(
numpy
.
random
.
rand
(
*
shape
),
dtype
=
'float64'
)
return
r
*
2
-
1
corrMM
=
corr
.
CorrMM
gradI
=
corr
.
CorrMM_gradInputs
adtens
=
T
.
dtensor4
()
bdtens
=
T
.
dtensor4
()
aivec_vals
=
[[
1
,
5
,
6
,
3
],
[
8
,
2
,
7
,
3
],
[
1
,
6
,
9
,
4
],
[
9
,
6
,
8
,
5
],
[
9
,
1
,
6
,
8
]]
bivec_vals
=
[[
7
,
5
,
3
,
1
],
[
4
,
2
,
5
,
3
],
[
12
,
6
,
3
,
2
],
[
5
,
6
,
1
,
3
],
[
7
,
1
,
3
,
4
]]
modes
=
[
'valid'
,
'full'
,
'half'
,
(
1
,
1
),
(
2
,
1
),
(
1
,
2
),
1
]
subsamples
=
[(
1
,
1
),
(
2
,
1
),
(
1
,
2
)]
for
aivec_val
,
bivec_val
in
zip
(
aivec_vals
,
bivec_vals
):
adtens_val
=
rand
(
*
aivec_val
)
bdtens_val
=
rand
(
*
bivec_val
)
for
mode
in
modes
:
for
subsample
in
subsamples
:
# CorrMM
cdtens
=
corrMM
(
border_mode
=
mode
,
subsample
=
subsample
)(
adtens
,
bdtens
)
f
=
theano
.
function
([
adtens
,
bdtens
],
cdtens
)
cdtens_val
=
f
(
adtens_val
,
bdtens_val
)
# CorrMM_gradInputs
shape
=
(
theano
.
shared
(
aivec_val
[
2
]),
theano
.
shared
(
aivec_val
[
3
]))
adtens_g
=
gradI
(
border_mode
=
mode
,
subsample
=
subsample
)(
bdtens
,
cdtens
,
shape
=
shape
)
self
.
_compile_and_check
([
bdtens
,
cdtens
],
[
adtens_g
],
[
bdtens_val
,
cdtens_val
],
gradI
,
warn
=
False
)
def
test_non_contiguous
(
self
):
self
.
validate
((
2
,
2
,
3
,
3
),
(
2
,
2
,
2
,
2
),
'valid'
,
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'valid'
,
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'valid'
,
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
3
,
2
),
'valid'
,
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'full'
,
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
,
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
'half'
,
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'half'
,
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
8
,
8
),
(
4
,
2
,
5
,
5
),
(
1
,
1
),
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
(
1
,
2
),
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
(
2
,
1
),
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
2
,
non_contiguous
=
True
)
if
__name__
==
'__main__'
:
t
=
TestCorr2D
(
'setUp'
)
t
.
setUp
()
t
.
test_infer_shape
()
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论