Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9592125c
提交
9592125c
authored
8月 25, 2017
作者:
Frédéric Bastien
提交者:
GitHub
8月 25, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6286 from vikramnitin9/grouped_unshared
Implement Unshared Convolution
上级
c32b0db8
121f96d6
隐藏空白字符变更
内嵌
并排
正在显示
12 个修改的文件
包含
1382 行增加
和
450 行删除
+1382
-450
blas.py
theano/gpuarray/blas.py
+191
-84
corr_gemm.c
theano/gpuarray/c_code/corr_gemm.c
+171
-65
dnn.py
theano/gpuarray/dnn.py
+11
-0
opt.py
theano/gpuarray/opt.py
+44
-21
test_gemmcorr.py
theano/gpuarray/tests/test_gemmcorr.py
+57
-12
__init__.py
theano/tensor/nnet/__init__.py
+24
-8
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+359
-107
corr_gemm.c
theano/tensor/nnet/c_code/corr_gemm.c
+146
-56
corr.py
theano/tensor/nnet/corr.py
+199
-85
opt.py
theano/tensor/nnet/opt.py
+18
-11
test_abstract_conv.py
theano/tensor/nnet/tests/test_abstract_conv.py
+151
-0
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+11
-1
没有找到文件。
theano/gpuarray/blas.py
浏览文件 @
9592125c
...
...
@@ -458,13 +458,15 @@ class BaseGpuCorrMM(CGpuKernelBase):
num_groups :
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately (default : 1).
unshared
Perform unshared correlation (default: False)
"""
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
,
'unshared'
)
_f16_ok
=
True
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
if
isinstance
(
border_mode
,
integer_types
):
border_mode
=
(
border_mode
,
border_mode
)
if
isinstance
(
border_mode
,
tuple
):
...
...
@@ -487,6 +489,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
raise
ValueError
(
"Number of groups should be greater than 0"
)
self
.
num_groups
=
num_groups
CGpuKernelBase
.
__init__
(
self
,
[
'c_code/corr_gemm.c'
])
self
.
unshared
=
unshared
@property
def
pad
(
self
):
...
...
@@ -495,12 +498,13 @@ class BaseGpuCorrMM(CGpuKernelBase):
return
(
0
,
0
)
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s,
%
s,
%
s
,
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
border_mode
,
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
),
str
(
self
.
num_groups
))
str
(
self
.
num_groups
),
str
(
self
.
unshared
))
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
...
...
@@ -533,7 +537,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
c_code_cache_version
(
self
):
# Raise this whenever modifying the C code (including the file).
return
(
1
0
,)
return
(
1
1
,)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
"""
...
...
@@ -581,6 +585,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
dH
,
dW
=
self
.
subsample
dilH
,
dilW
=
self
.
filter_dilation
numgroups
=
self
.
num_groups
unshared
=
int
(
self
.
unshared
)
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
...
...
@@ -633,19 +638,24 @@ class BaseGpuCorrMM(CGpuKernelBase):
int padH =
%(padH)
s;
int padW =
%(padW)
s;
int numgroups =
%(numgroups)
s;
int unshared =
%(unshared)
s;
PyGpuArrayObject * bottom =
%(bottom)
s;
PyGpuArrayObject * weights =
%(weights)
s;
PyGpuArrayObject * top =
%(top)
s;
PyGpuArrayObject * out2 = NULL;
int wdim, odim;
wdim = unshared ? 6 : 4;
odim = 4; //Can be set to 6 later for unshared backprop wrt weights
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
size_t kH, kW, dil_kH, dil_kW;
if (direction != 1) {
// weight is an input variable, we can just read its shape
kH = PyGpuArray_DIMS(weights)[2];
kW = PyGpuArray_DIMS(weights)[
3
];
kH = PyGpuArray_DIMS(weights)[
wdim-
2];
kW = PyGpuArray_DIMS(weights)[
wdim-1
];
}
else {
if (
%(height)
s != -1) {
...
...
@@ -699,8 +709,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
// Infer output shape and type
// The inferred shape can be negative.
long long out_dim[4];
size_t out_dim_size[4];
long long out_dim[6];
size_t out_dim_size[6];
out_dim[4] = out_dim[5] = 0; //Only used for unshared backprop wrt weights
out_dim_size[4] = out_dim_size[5] = 0; //Same
int out_typecode;
PyGpuContextObject *out_context;
switch(direction) {
...
...
@@ -709,71 +721,131 @@ class BaseGpuCorrMM(CGpuKernelBase):
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = PyGpuArray_DIMS(bottom)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[0];
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] + 2*padH - ((PyGpuArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[
3
]-1)*dilW + 1)) / dW + 1;
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] + 2*padH - ((PyGpuArray_DIMS(weights)[
wdim-
2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[
wdim-1
]-1)*dilW + 1)) / dW + 1;
out_typecode = bottom->ga.typecode;
out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)
s
if (unshared) {
PyErr_Format(PyExc_ValueError,
"GpuCorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(weights)[4], PyGpuArray_DIMS(weights)[5],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)
s
}
else {
PyErr_Format(PyExc_ValueError,
"GpuCorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)
s
}
}
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width) or
// (num_filters, top_height, top_width, num_channels, height, width) -> for unshared
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = PyGpuArray_DIMS(top)[1];
out_dim[1] = PyGpuArray_DIMS(bottom)[1] / numgroups;
out_dim[2] = kH; // already inferred further above
out_dim[3] = kW; // how convenient
if (unshared){
odim = 6;
out_dim[1] = PyGpuArray_DIMS(top)[2];
out_dim[2] = PyGpuArray_DIMS(top)[3];
}
out_dim[wdim-3] = PyGpuArray_DIMS(bottom)[1] / numgroups;
out_dim[wdim-2] = kH; // already inferred further above
out_dim[wdim-1] = kW; // how convenient
out_typecode = top->ga.typecode;
out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
if (unshared) {
if (out_dim[0] < 0 || out_dim[1] <= 0 || out_dim[2] <= 0 || out_dim[3] < 0
|| out_dim[4] <= 0 || out_dim[5] <= 0){
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
out_dim[4], out_dim[5],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
}
else {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
}
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[
1
] * numgroups;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[
3
]-1)*dilW + 1 - 2*padW;
out_dim[1] = PyGpuArray_DIMS(weights)[
wdim-3
] * numgroups;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[
wdim-
2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[
wdim-1
]-1)*dilW + 1 - 2*padW;
out_typecode = top->ga.typecode;
out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weight shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
if (unshared) {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weight shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(weights)[4], PyGpuArray_DIMS(weights)[5],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
}
else {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weight shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
}
break;
default:
...
...
@@ -786,12 +858,24 @@ class BaseGpuCorrMM(CGpuKernelBase):
out_dim_size[2] = (size_t)out_dim[2];
out_dim_size[3] = (size_t)out_dim[3];
if (odim == 6) {
out_dim_size[4] = (size_t)out_dim[4];
out_dim_size[5] = (size_t)out_dim[5];
}
// Prepare output array
if (theano_prep_output(&
%(out)
s,
4
, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
if (theano_prep_output(&
%(out)
s,
odim
, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
{
PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld",
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
if (odim == 4) {
PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld",
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
}
if (odim == 6) {
PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld
%%
lld
%%
lld",
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4], out_dim[5]);
}
%(fail)
s
}
if (!GpuArray_IS_C_CONTIGUOUS(&
%(out)
s->ga)) {
...
...
@@ -800,7 +884,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
}
// Call GPU code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups
, unshared
);
if (out2==NULL){
%(fail)
s
}
...
...
@@ -840,6 +924,8 @@ class GpuCorrMM(BaseGpuCorrMM):
divided into.
should be an int
set to 1 to disable grouped convolution
unshared
Perform unshared correlation (default: False)
Notes
-----
...
...
@@ -859,9 +945,9 @@ class GpuCorrMM(BaseGpuCorrMM):
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
super
(
GpuCorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
img
,
kern
):
ctx_name
=
infer_context_name
(
img
,
kern
)
...
...
@@ -869,8 +955,12 @@ class GpuCorrMM(BaseGpuCorrMM):
kern
=
as_gpuarray_variable
(
kern
,
ctx_name
)
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
self
.
unshared
:
if
kern
.
type
.
ndim
!=
6
:
raise
TypeError
(
'kern must be 6D tensor'
)
else
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
broadcastable
=
[
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
False
,
False
]
...
...
@@ -891,12 +981,14 @@ class GpuCorrMM(BaseGpuCorrMM):
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
self
.
num_groups
,
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
self
.
num_groups
,
self
.
unshared
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
...
...
@@ -915,10 +1007,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
img
,
topgrad
)
...
...
@@ -938,8 +1032,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
if
self
.
unshared
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
img
.
type
.
broadcastable
[
1
],
False
,
False
]
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
return
Apply
(
self
,
[
img
,
topgrad
]
+
height_width
,
[
GpuArrayType
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
,
broadcastable
=
broadcastable
)()])
...
...
@@ -958,11 +1056,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
self
.
num_groups
,
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
GpuCorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),
)
*
2
if
len
(
inp
)
==
4
else
()
...
...
@@ -989,16 +1088,22 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
kern
,
topgrad
)
kern
=
as_gpuarray_variable
(
kern
,
ctx_name
)
topgrad
=
as_gpuarray_variable
(
topgrad
,
ctx_name
)
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
self
.
unshared
:
if
kern
.
type
.
ndim
!=
6
:
raise
TypeError
(
'kern must be 6D tensor'
)
else
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
shape
is
None
:
...
...
@@ -1014,7 +1119,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
False
]
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
-
3
],
False
,
False
]
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
[
GpuArrayType
(
dtype
=
topgrad
.
dtype
,
context_name
=
ctx_name
,
...
...
@@ -1034,13 +1139,15 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
self
.
num_groups
,
self
.
unshared
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
GpuCorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),
)
*
2
if
len
(
inp
)
==
4
else
()
...
...
@@ -1682,7 +1789,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
False
,
False
]
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
-
4
],
False
,
False
,
False
]
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width_depth
,
[
GpuArrayType
(
dtype
=
topgrad
.
dtype
,
...
...
theano/gpuarray/c_code/corr_gemm.c
浏览文件 @
9592125c
...
...
@@ -349,7 +349,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dilW
=
1
,
const
size_t
padH
=
0
,
const
size_t
padW
=
0
,
const
size_t
numgroups
=
1
)
const
size_t
numgroups
=
1
,
const
size_t
unshared
=
0
)
{
if
(
PyGpuArray_NDIM
(
bottom
)
!=
4
)
{
...
...
@@ -368,21 +369,35 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
return
NULL
;
}
if
(
PyGpuArray_NDIM
(
weight
)
!=
4
)
if
(
PyGpuArray_NDIM
(
weight
)
!=
(
unshared
?
6
:
4
)
)
{
PyErr_
SetString
(
PyExc_ValueError
,
"GpuCorrMM requires weight of 4D"
);
PyErr_
Format
(
PyExc_ValueError
,
"GpuCorrMM requires weight of %dD"
,
unshared
?
6
:
4
);
return
NULL
;
}
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
weight
->
ga
))
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM requires weight to be C-contiguous, "
"but strides are: %ld %ld %ld %ld
\n
"
,
PyGpuArray_STRIDES
(
weight
)[
0
],
PyGpuArray_STRIDES
(
weight
)[
1
],
PyGpuArray_STRIDES
(
weight
)[
2
],
PyGpuArray_STRIDES
(
weight
)[
3
]);
return
NULL
;
if
(
unshared
)
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM requires weight to be C-contiguous, "
"but strides are: %ld %ld %ld %ld %ld %ld
\n
"
,
PyGpuArray_STRIDES
(
weight
)[
0
],
PyGpuArray_STRIDES
(
weight
)[
1
],
PyGpuArray_STRIDES
(
weight
)[
2
],
PyGpuArray_STRIDES
(
weight
)[
3
],
PyGpuArray_STRIDES
(
weight
)[
4
],
PyGpuArray_STRIDES
(
weight
)[
5
]);
return
NULL
;
}
else
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM requires weight to be C-contiguous, "
"but strides are: %ld %ld %ld %ld
\n
"
,
PyGpuArray_STRIDES
(
weight
)[
0
],
PyGpuArray_STRIDES
(
weight
)[
1
],
PyGpuArray_STRIDES
(
weight
)[
2
],
PyGpuArray_STRIDES
(
weight
)[
3
]);
return
NULL
;
}
}
if
(
PyGpuArray_NDIM
(
top
)
!=
4
)
...
...
@@ -409,10 +424,12 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
bottomHeight
=
PyGpuArray_DIMS
(
bottom
)[
2
];
const
size_t
bottomWidth
=
PyGpuArray_DIMS
(
bottom
)[
3
];
// weights: (nFilters, nChannels, rows, columns)
// or (nFilters, out_rows, out_columns, nChannels, rows, columns) -> for unshared
const
size_t
nFilters
=
PyGpuArray_DIMS
(
weight
)[
0
];
const
size_t
kH
=
PyGpuArray_DIMS
(
weight
)[
2
];
const
size_t
kW
=
PyGpuArray_DIMS
(
weight
)[
3
];
if
(
nChannels
!=
(
PyGpuArray_DIMS
(
weight
)[
1
]
*
numgroups
))
{
const
size_t
kH
=
PyGpuArray_DIMS
(
weight
)[
unshared
?
4
:
2
];
const
size_t
kW
=
PyGpuArray_DIMS
(
weight
)[
unshared
?
5
:
3
];
if
(
nChannels
!=
PyGpuArray_DIMS
(
weight
)[
unshared
?
3
:
1
]
*
numgroups
)
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuCorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
...
...
@@ -435,21 +452,56 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
size_t
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyGpuArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM shape inconsistency:
\n
"
" bottom shape: %ld %ld %ld %ld
\n
"
" weight shape: %ld %ld %ld %ld
\n
"
" top shape: %ld %ld %ld %ld (expected %ld %ld %ld %ld)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
PyGpuArray_DIMS
(
top
)[
0
],
PyGpuArray_DIMS
(
top
)[
1
],
PyGpuArray_DIMS
(
top
)[
2
],
PyGpuArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
if
(
unshared
)
{
if
(
topHeight
!=
PyGpuArray_DIMS
(
weight
)[
1
]
||
topWidth
!=
PyGpuArray_DIMS
(
weight
)[
2
])
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM regions in kernel must match output regions:
\n
"
" bottom shape: %ld %ld %ld %ld
\n
"
" weight shape: %ld %ld %ld %ld %ld %ld"
" (expected %ld %ld %ld %ld %ld %ld)
\n
"
" top shape(calculated): %ld %ld %ld %ld
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
PyGpuArray_DIMS
(
weight
)[
1
],
PyGpuArray_DIMS
(
weight
)[
2
],
nChannels
/
numgroups
,
kH
,
kW
,
nFilters
,
topHeight
,
topWidth
,
nChannels
/
numgroups
,
kH
,
kW
,
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyGpuArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM shape inconsistency:
\n
"
" bottom shape: %ld %ld %ld %ld
\n
"
" weight shape: %ld %ld %ld %ld %ld %ld
\n
"
" top shape: %ld %ld %ld %ld (expected %ld %ld %ld %ld)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
topHeight
,
topWidth
,
nChannels
/
numgroups
,
kH
,
kW
,
PyGpuArray_DIMS
(
top
)[
0
],
PyGpuArray_DIMS
(
top
)[
1
],
PyGpuArray_DIMS
(
top
)[
2
],
PyGpuArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
}
else
{
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyGpuArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM shape inconsistency:
\n
"
" bottom shape: %ld %ld %ld %ld
\n
"
" weight shape: %ld %ld %ld %ld
\n
"
" top shape: %ld %ld %ld %ld (expected %ld %ld %ld %ld)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
PyGpuArray_DIMS
(
top
)[
0
],
PyGpuArray_DIMS
(
top
)[
1
],
PyGpuArray_DIMS
(
top
)[
2
],
PyGpuArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
}
int
err
=
gpublas_setup
(
bottom
->
context
->
ctx
);
...
...
@@ -512,19 +564,37 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
return
NULL
;
}
// Second, gemm
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
N_
,
M_
,
K_
,
1
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
0
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
);
if
(
unshared
)
{
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
)
{
for
(
size_t
reg
=
0
;
reg
<
N_
;
++
reg
){
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
1
,
M_
,
K_
,
1
,
&
col
->
ga
,
g
*
group_col_stride
+
reg
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
+
reg
*
K_
,
K_
*
N_
,
0
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
+
reg
,
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM forward encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM forward encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
else
{
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
){
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
N_
,
M_
,
K_
,
1
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
0
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM forward encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
}
...
...
@@ -557,19 +627,37 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
K_
,
M_
,
N_
,
1
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
(
n
==
0
)
?
0
:
1
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
);
if
(
unshared
)
{
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
)
{
for
(
size_t
reg
=
0
;
reg
<
N_
;
++
reg
){
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
K_
,
M_
,
1
,
1
,
&
col
->
ga
,
g
*
group_col_stride
+
reg
,
N_
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
+
reg
,
N_
,
(
n
==
0
)
?
0
:
1
,
&
weight
->
ga
,
g
*
group_weight_stride
+
reg
*
K_
,
K_
*
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad weights encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad weights encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
else
{
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
K_
,
M_
,
N_
,
1
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
(
n
==
0
)
?
0
:
1
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad weights encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
}
...
...
@@ -590,19 +678,37 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// gemm into columns
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
N_
,
K_
,
M_
,
1
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
0
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
);
if
(
unshared
)
{
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
){
for
(
size_t
reg
=
0
;
reg
<
N_
;
++
reg
)
{
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
1
,
K_
,
M_
,
1
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
+
reg
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
+
reg
*
K_
,
K_
*
N_
,
0
,
&
col
->
ga
,
g
*
group_col_stride
+
reg
,
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad inputs encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad inputs encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
else
{
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
){
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
N_
,
K_
,
M_
,
1
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
0
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad inputs encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
// col2im back to the data
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
...
...
theano/gpuarray/dnn.py
浏览文件 @
9592125c
...
...
@@ -3035,6 +3035,9 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if
version
(
raises
=
False
)
<
6000
and
op
.
filter_dilation
!=
(
1
,
1
):
return
None
if
op
.
unshared
:
return
None
inp1
=
inputs
[
0
]
inp2
=
inputs
[
1
]
...
...
@@ -3129,6 +3132,8 @@ def local_abstractconv_cudnn(node):
ctx
=
infer_context_name
(
*
node
.
inputs
)
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
):
return
if
node
.
op
.
unshared
:
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
elif
isinstance
(
node
.
op
,
AbstractConv3d
):
...
...
@@ -3143,6 +3148,8 @@ def local_abstractconv_cudnn_alt(node):
if
version
(
raises
=
False
)
<
6000
and
node
.
op
.
filter_dilation
!=
(
1
,
1
):
return
None
if
node
.
op
.
unshared
:
return
None
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
...
...
@@ -3349,6 +3356,8 @@ def local_abstractconv_gw_cudnn(node):
ctx
=
infer_context_name
(
*
node
.
inputs
)
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
):
return
if
node
.
op
.
unshared
:
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradWeights
):
...
...
@@ -3360,6 +3369,8 @@ def local_abstractconv_gi_cudnn(node):
ctx
=
infer_context_name
(
*
node
.
inputs
)
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
):
return
if
node
.
op
.
unshared
:
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradInputs
):
...
...
theano/gpuarray/opt.py
浏览文件 @
9592125c
...
...
@@ -1595,12 +1595,17 @@ def local_abstractconv_gemm(node):
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))
and
node
.
op
.
num_groups
==
1
):
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern_axes
=
(
1
,
0
)
+
tuple
(
i
for
i
in
range
(
2
,
kern
.
ndim
))
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))
and
num_groups
==
1
and
not
unshared
):
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[
:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[
flip
]
# need to dimshuffle the kernel for full convolution
kern
=
kern
.
dimshuffle
(
1
,
0
,
2
,
3
)
kern
=
kern
.
dimshuffle
(
kern_axes
)
# call GpuCorrMM_gradInputs
rval
=
GpuCorrMM_gradInputs
(
'valid'
,
subsample
,
...
...
@@ -1609,13 +1614,14 @@ def local_abstractconv_gemm(node):
else
:
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
kern
=
kern
[
:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[
flip
]
# By default use GpuCorrMM
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
filter_dilation
,
node
.
op
.
num_groups
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
num_groups
,
unshared
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth
...
...
@@ -1628,11 +1634,12 @@ def local_abstractconv_gemm(node):
(
node
.
op
.
kshp
is
not
None
)
and
(
None
not
in
node
.
op
.
kshp
)
and
border_mode
!=
"half"
and
node
.
op
.
num_groups
==
1
):
num_groups
==
1
and
not
unshared
):
# we know the kernel and output size
prod1
=
node
.
op
.
kshp
[
0
]
*
node
.
op
.
kshp
[
1
]
prod1
=
node
.
op
.
kshp
[
0
]
*
node
.
op
.
kshp
[
-
3
]
prod2
=
((
node
.
op
.
imshp
[
-
2
]
-
node
.
op
.
kshp
[
0
]
+
1
)
*
(
node
.
op
.
imshp
[
-
1
]
-
node
.
op
.
kshp
[
1
]
+
1
))
(
node
.
op
.
imshp
[
-
1
]
-
node
.
op
.
kshp
[
-
3
]
+
1
))
if
(
None
not
in
node
.
op
.
imshp
[:
1
]):
# we also know batchsize and input channels
prod1
*=
node
.
op
.
imshp
[
0
]
...
...
@@ -1666,13 +1673,19 @@ def local_abstractconv_gemm_def(node):
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern
=
kern
[
flip
]
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
filter_dilation
,
node
.
op
.
num_groups
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
num_groups
,
unshared
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
return
[
rval
]
...
...
@@ -1690,8 +1703,9 @@ def local_abstractconv_gemm_alt(node):
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
:
if
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
and
not
unshared
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
...
...
@@ -1702,7 +1716,7 @@ def local_abstractconv_gemm_alt(node):
gpu_contiguous
(
kern
),
gpu_contiguous
(
img
))
elif
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
filter_dilation
==
(
1
,
1
)
and
num_groups
==
1
):
num_groups
==
1
and
not
unshared
):
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
...
...
@@ -1896,10 +1910,13 @@ def local_abstractconv_gradweights_gemm(node):
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
num_groups
=
node
.
op
.
num_groups
,
unshared
=
node
.
op
.
unshared
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
flip
=
(
slice
(
None
),)
*
(
rval
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
if
node
.
op
.
filter_flip
:
rval
=
rval
[
:,
:,
::
-
1
,
::
-
1
]
rval
=
rval
[
flip
]
rval
=
tensor
.
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
rval
=
as_gpuarray_variable
(
rval
,
context_name
=
ctx
)
return
[
rval
]
...
...
@@ -1918,9 +1935,10 @@ def local_abstractconv_gemm_gradweights_alt(node):
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
filter_dilation
==
(
1
,
1
)
and
num_groups
==
1
):
num_groups
==
1
and
not
unshared
):
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
filter_dilation
)(
...
...
@@ -2001,12 +2019,15 @@ def local_abstractconv_gradinputs_gemm(node):
return
None
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern
=
kern
[
flip
]
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
num_groups
=
node
.
op
.
num_groups
,
unshared
=
node
.
op
.
unshared
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
return
[
rval
]
...
...
@@ -2023,8 +2044,9 @@ def local_abstractconv_gradinputs_gemm_alt(node):
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
:
if
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
and
not
unshared
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
...
...
@@ -2117,8 +2139,9 @@ class ConvMetaOptimizer(LocalMetaOptimizer):
node
.
op
.
border_mode
,
node
.
op
.
subsample
,
node
.
op
.
filter_dilation
)
convdim
=
img
.
ndim
-
2
result
[
kshape
]
=
theano
.
tensor
.
as_tensor_variable
(
node
.
op
.
kshp
[
2
:])
result
[
kshape
]
=
theano
.
tensor
.
as_tensor_variable
(
node
.
op
.
kshp
[
-
convdim
:])
for
(
var
,
shape
)
in
zip
((
img
,
top
),
(
node
.
op
.
imshp
,
tshp
)):
result
[
var
]
=
theano
.
shared
(
np
.
random
.
random
(
shape
)
.
astype
(
var
.
dtype
),
...
...
theano/gpuarray/tests/test_gemmcorr.py
浏览文件 @
9592125c
...
...
@@ -11,7 +11,7 @@ from theano.tensor.nnet.corr import CorrMM, CorrMM_gradWeights, CorrMM_gradInput
from
..type
import
gpuarray_shared_constructor
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
class
TestCorrMM
(
unittest
.
TestCase
):
...
...
@@ -20,9 +20,13 @@ class TestCorrMM(unittest.TestCase):
border_mode
=
'valid'
,
filter_dilation
=
(
1
,
1
),
subsample
=
(
1
,
1
),
unshared
=
False
,
verify_grad
=
False
):
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
if
unshared
:
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
1
,
2
,
5
,
3
,
4
)]
else
:
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
config
.
floatX
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
config
.
floatX
)
...
...
@@ -32,13 +36,15 @@ class TestCorrMM(unittest.TestCase):
conv_ref
=
CorrMM
(
border_mode
=
border_mode
,
filter_dilation
=
filter_dilation
,
subsample
=
subsample
)(
ref_cast
(
inputs
),
ref_cast
(
filters
))
subsample
=
subsample
,
unshared
=
unshared
)(
ref_cast
(
inputs
),
ref_cast
(
filters
))
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
mode_without_gpu
)
conv
=
GpuCorrMM
(
border_mode
=
border_mode
,
filter_dilation
=
filter_dilation
,
subsample
=
subsample
)(
inputs
,
filters
)
subsample
=
subsample
,
unshared
=
unshared
)(
inputs
,
filters
)
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
res_ref
=
f_ref
()
...
...
@@ -48,7 +54,8 @@ class TestCorrMM(unittest.TestCase):
if
verify_grad
:
utt
.
verify_grad
(
GpuCorrMM
(
border_mode
=
border_mode
,
filter_dilation
=
filter_dilation
,
subsample
=
subsample
),
subsample
=
subsample
,
unshared
=
unshared
),
[
inputs_val
,
filters_val
],
mode
=
mode_with_gpu
)
def
test_valid
(
self
):
...
...
@@ -57,12 +64,6 @@ class TestCorrMM(unittest.TestCase):
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
subsample
=
(
2
,
2
))
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
subsample
=
(
2
,
2
))
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
subsample
=
(
3
,
3
))
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
subsample
=
(
3
,
3
))
...
...
@@ -117,6 +118,41 @@ class TestCorrMM(unittest.TestCase):
border_mode
=
border_mode
,
verify_grad
=
True
)
def
test_unshared
(
self
):
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
15
,
1
,
6
,
12
,
1
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
8
,
1
,
6
,
12
,
1
),
subsample
=
(
2
,
2
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
5
,
1
,
6
,
12
,
1
),
subsample
=
(
3
,
3
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
5
,
1
,
6
,
12
,
1
),
subsample
=
(
3
,
2
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
15
,
1
,
6
,
12
,
1
),
subsample
=
(
1
,
2
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
15
,
1
,
6
,
12
,
1
),
border_mode
=
'valid'
,
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
21
,
13
,
6
,
12
,
1
),
border_mode
=
'half'
,
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
25
,
23
,
6
,
12
,
1
),
border_mode
=
'full'
,
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
15
,
1
,
6
,
12
,
1
),
border_mode
=
(
0
,
0
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
17
,
5
,
6
,
12
,
1
),
border_mode
=
(
1
,
2
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
21
,
5
,
6
,
12
,
1
),
border_mode
=
(
3
,
2
),
unshared
=
True
)
def
run_gradweight
(
self
,
inputs_shape
,
filters_shape
,
dCdH_shape
,
subsample
=
(
1
,
1
)):
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
...
...
@@ -227,3 +263,12 @@ class TestGroupGpuCorr2d(Grouped_conv_noOptim):
conv_op
=
GpuCorrMM
conv_gradw_op
=
GpuCorrMM_gradWeights
conv_gradi_op
=
GpuCorrMM_gradInputs
flip_filter
=
True
is_dnn
=
False
class
TestUnsharedGpuCorr2d
(
TestUnsharedConv
):
mode
=
mode_with_gpu
conv2d_op
=
GpuCorrMM
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradi_op
=
GpuCorrMM_gradInputs
theano/tensor/nnet/__init__.py
浏览文件 @
9592125c
...
...
@@ -37,7 +37,7 @@ from .abstract_conv import separable_conv2d
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
**
kwargs
):
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
,
**
kwargs
):
"""
This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
...
...
@@ -51,18 +51,22 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
(batch size, input channels, input rows, input columns).
See the optional parameter ``input_shape``.
filters: symbolic 4D tensor
filters: symbolic 4D
or 6D
tensor
Set of filters used in CNN layer of shape
(output channels, input channels, filter rows, filter columns).
(output channels, input channels, filter rows, filter columns)
for normal convolution and
(output channels, output rows, output columns, input channels,
filter rows, filter columns)
for unshared convolution.
See the optional parameter ``filter_shape``.
input_shape: None, tuple/list of len 4 of int or Constant variable
input_shape: None, tuple/list of len 4 o
r 6 o
f int or Constant variable
The shape of the input parameter.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
element is not known at compile time.
filter_shape: None, tuple/list of len 4 of int or Constant variable
filter_shape: None, tuple/list of len 4 o
r 6 o
f int or Constant variable
The shape of the filters parameter.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
...
...
@@ -105,6 +109,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
kwargs: Any other keyword arguments are accepted for backwards
compatibility, but will be ignored.
...
...
@@ -154,12 +163,12 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
return
abstract_conv2d
(
input
,
filters
,
input_shape
,
filter_shape
,
border_mode
,
subsample
,
filter_flip
,
filter_dilation
,
num_groups
)
filter_dilation
,
num_groups
,
unshared
)
def
conv2d_transpose
(
input
,
filters
,
output_shape
,
filter_shape
=
None
,
border_mode
=
'valid'
,
input_dilation
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
"""
This function will build the symbolic graph for applying a transposed
convolution over a mini-batch of a stack of 2D inputs with a set of 2D
...
...
@@ -215,6 +224,12 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
Grouped unshared convolution is supported.
Returns
-------
Symbolic 4D tensor
...
...
@@ -242,4 +257,5 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
subsample
=
input_dilation
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
theano/tensor/nnet/abstract_conv.py
浏览文件 @
9592125c
...
...
@@ -44,9 +44,14 @@ def get_conv_output_shape(image_shape, kernel_shape,
to: batch size, number of input channels, height and width (and
possibly depth) of the image. None where undefined.
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel shape. Its four (or five) elements must correspond respectively
to: number of output channels, number of input channels, height and
width (and possibly depth) of the kernel. None where undefined.
kernel shape. For a normal convolution, its four (for 2D convolution)
or five (for 3D convolution) elements must correspond respectively to :
number of output channels, number of input channels, height and width
(and possibly depth) of the kernel.
For an unshared 2D convolution, its six channels must correspond to :
number of output channels, height and width of the output, number of
input channels, height and width of the kernel.
None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
...
...
@@ -56,6 +61,8 @@ def get_conv_output_shape(image_shape, kernel_shape,
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two or three
elements correspond respectively to the dilation on height and width axis.
Note - The shape of the convolution output does not depend on the 'unshared'
or the 'num_groups' parameters.
Returns
-------
...
...
@@ -65,7 +72,10 @@ def get_conv_output_shape(image_shape, kernel_shape,
"""
bsize
,
imshp
=
image_shape
[
0
],
image_shape
[
2
:]
nkern
,
kshp
=
kernel_shape
[
0
],
kernel_shape
[
2
:]
convdim
=
len
(
image_shape
)
-
2
nkern
,
kshp
=
kernel_shape
[
0
],
kernel_shape
[
-
convdim
:]
if
filter_dilation
is
None
:
filter_dilation
=
np
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
...
...
@@ -139,7 +149,7 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
def
get_conv_gradweights_shape
(
image_shape
,
top_shape
,
border_mode
,
subsample
,
filter_dilation
=
None
,
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
"""
This function tries to compute the kernel shape of convolution gradWeights.
...
...
@@ -169,6 +179,8 @@ def get_conv_gradweights_shape(image_shape, top_shape,
width axis.
num_groups: An int which specifies the number of separate groups to
be divided into.
unshared: Boolean value. If true, unshared convolution will be performed,
where a different filter is applied to each area of the input.
Returns
-------
...
...
@@ -194,7 +206,10 @@ def get_conv_gradweights_shape(image_shape, top_shape,
out_shp
=
tuple
(
get_conv_gradweights_shape_1axis
(
imshp
[
i
],
topshp
[
i
],
border_mode
,
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
return
(
nchan
,
nkern
)
+
out_shp
if
unshared
:
return
(
nchan
,)
+
top_shape
[
2
:]
+
(
nkern
,)
+
out_shp
else
:
return
(
nchan
,
nkern
)
+
out_shp
def
get_conv_gradweights_shape_1axis
(
image_shape
,
top_shape
,
border_mode
,
...
...
@@ -280,6 +295,8 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
width axis.
num_groups: An int which specifies the number of separate groups to
be divided into.
Note - The shape of the convolution output does not depend on the 'unshared'
parameter.
Returns
-------
...
...
@@ -289,7 +306,9 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
"""
bsize
,
topshp
=
top_shape
[
0
],
top_shape
[
2
:]
nkern
,
kshp
=
kernel_shape
[
1
],
kernel_shape
[
2
:]
convdim
=
len
(
top_shape
)
-
2
nkern
,
kshp
=
kernel_shape
[
1
],
kernel_shape
[
-
convdim
:]
if
filter_dilation
is
None
:
filter_dilation
=
np
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
...
...
@@ -522,7 +541,8 @@ def conv2d(input,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
"""This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
after Convolutional Neural Networks (CNN).
...
...
@@ -538,7 +558,8 @@ def conv2d(input,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
return
conv_op
(
input
,
filters
)
...
...
@@ -878,7 +899,6 @@ def conv3d(input,
version until it is released.
"""
input
=
as_tensor_variable
(
input
)
filters
=
as_tensor_variable
(
filters
)
conv_op
=
AbstractConv3d
(
imshp
=
input_shape
,
...
...
@@ -899,7 +919,8 @@ def conv2d_grad_wrt_inputs(output_grad,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
"""Compute conv output gradient w.r.t its inputs
This function builds the symbolic graph for getting the
...
...
@@ -916,10 +937,14 @@ def conv2d_grad_wrt_inputs(output_grad,
will be upsampled or the output gradient of the convolution
whose gradient will be taken with respect to the input of the
convolution.
filters : symbolic 4D tensor
set of filters used in CNN layer of shape (output channels,
input channels, filter rows, filter columns). See the
optional parameter ``filter_shape``.
filters: symbolic 4D or 6D tensor
Set of filters used in CNN layer of shape
(output channels, input channels, filter rows, filter columns)
for normal convolution and
(output channels, output rows, output columns, input channels,
filter rows, filter columns)
for unshared convolution.
See the optional parameter ``filter_shape``.
input_shape : [None/int/Constant] * 2 + [Tensor/int/Constant] * 2
The shape of the input (upsampled) parameter.
A tuple/list of len 4, with the first two dimensions
...
...
@@ -928,8 +953,9 @@ def conv2d_grad_wrt_inputs(output_grad,
Not Optional, since given the output_grad shape
and the subsample values, multiple input_shape may be
plausible.
filter_shape : None or [None/int/Constant] * 4
The shape of the filters parameter. None or a tuple/list of len 4.
filter_shape : None or [None/int/Constant] * (4 or 6)
The shape of the filters parameter. None or a tuple/list of len 4 or a
tuple/list of len 6 (for unshared convolution)
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that
this element is not known at compile time.
...
...
@@ -975,6 +1001,10 @@ def conv2d_grad_wrt_inputs(output_grad,
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
Returns
-------
...
...
@@ -1009,7 +1039,14 @@ def conv2d_grad_wrt_inputs(output_grad,
# checking the type of filter_shape
if
filter_shape
is
not
None
:
for
dim
in
[
0
,
1
,
2
,
3
]:
if
unshared
:
expected_dim
=
6
else
:
expected_dim
=
4
assert
len
(
filter_shape
)
==
expected_dim
for
dim
in
range
(
expected_dim
):
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorConstant
,
integer_types
,
type
(
None
)))
...
...
@@ -1026,7 +1063,8 @@ def conv2d_grad_wrt_inputs(output_grad,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
return
grad_input_op
(
filters
,
output_grad
,
input_shape
[
-
2
:])
...
...
@@ -1179,7 +1217,8 @@ def conv2d_grad_wrt_weights(input,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
"""Compute conv output gradient w.r.t its weights
This function will build the symbolic graph for getting the
...
...
@@ -1195,10 +1234,10 @@ def conv2d_grad_wrt_weights(input,
mini-batch of feature map stacks, of shape (batch size, input
channels, input rows, input columns). This is the gradient of
the output of convolution.
filter_shape : [None/int/Constant] *
2
+ [Tensor/int/Constant] * 2
The shape of the filter parameter. A tuple/list of len 4
, with the
first two dimensions being None or int or Constant and the last two
dimensions being Tensor or int or Constant.
filter_shape : [None/int/Constant] *
(2 or 4)
+ [Tensor/int/Constant] * 2
The shape of the filter parameter. A tuple/list of len 4
or 6
(for unshared), with the first two dimensions being None or int or
Constant and the last two
dimensions being Tensor or int or Constant.
Not Optional, since given the output_grad shape and
the input_shape, multiple filter_shape may be plausible.
input_shape : None or [None/int/Constant] * 4
...
...
@@ -1247,13 +1286,19 @@ def conv2d_grad_wrt_weights(input,
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
Returns
-------
symbolic 4D tensor
symbolic 4D tensor
or 6D tensor
set of feature maps generated by convolutional layer. Tensor
is of shape (batch size, output channels, output rows, output
columns)
columns) for normal convolution and
(output channels, output rows, output columns, input channels,
filter rows, filter columns) for unshared convolution
Notes
-----
...
...
@@ -1274,7 +1319,11 @@ def conv2d_grad_wrt_weights(input,
for
dim
in
[
0
,
1
]:
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorConstant
,
integer_types
,
type
(
None
)))
for
dim
in
[
2
,
3
]:
if
unshared
:
for
dim
in
[
2
,
3
]:
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorConstant
,
integer_types
,
type
(
None
)))
for
dim
in
[
-
2
,
-
1
]:
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorVariable
,
theano
.
tensor
.
TensorConstant
,
integer_types
))
...
...
@@ -1288,7 +1337,7 @@ def conv2d_grad_wrt_weights(input,
# setting the last two dimensions of filter_shape to None, if
# the type of these dimensions is TensorVariable.
numerical_filter_shape
=
list
(
filter_shape
)
for
dim
in
[
2
,
3
]:
for
dim
in
[
-
2
,
-
1
]:
if
isinstance
(
filter_shape
[
dim
],
theano
.
tensor
.
TensorVariable
):
numerical_filter_shape
[
dim
]
=
None
...
...
@@ -1298,7 +1347,8 @@ def conv2d_grad_wrt_weights(input,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[
-
2
:])
...
...
@@ -1631,7 +1681,8 @@ class BaseAbstractConv(Op):
element is not known at compile time.
imshp is defined w.r.t the forward conv.
kshp: None, tuple/list of len ``(2 + convdim)`` of int or Constant variable
kshp: None, tuple/list of len ``(2 + convdim)`` or ``(2 + 2 * convdim)``
(for unshared) of int or Constant variable
The shape of the filters parameter.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
...
...
@@ -1670,14 +1721,24 @@ class BaseAbstractConv(Op):
filter_dilation: tuple of len ``convdim``
Factor by which to subsample (stride) the input.
Also called dilation factor.
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
"""
check_broadcast
=
False
__props__
=
(
'convdim'
,
'border_mode'
,
'subsample'
,
'filter_flip'
,
'imshp'
,
'kshp'
,
'filter_dilation'
,
'num_groups'
)
'imshp'
,
'kshp'
,
'filter_dilation'
,
'num_groups'
,
'unshared'
)
def
__init__
(
self
,
convdim
,
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
None
,
filter_flip
=
True
,
filter_dilation
=
None
,
num_groups
=
1
):
subsample
=
None
,
filter_flip
=
True
,
filter_dilation
=
None
,
num_groups
=
1
,
unshared
=
False
):
self
.
convdim
=
convdim
if
convdim
not
in
(
2
,
3
):
...
...
@@ -1718,7 +1779,10 @@ class BaseAbstractConv(Op):
ValueError
(
"imshp should be None or a tuple of "
"constant int values"
),
sys
.
exc_info
()[
2
])
self
.
kshp
=
tuple
(
kshp
)
if
kshp
else
(
None
,)
*
(
2
+
convdim
)
if
kshp
:
self
.
kshp
=
tuple
(
kshp
)
else
:
self
.
kshp
=
(
None
,)
*
((
2
+
2
*
convdim
)
if
unshared
else
(
2
+
convdim
))
for
kshp_i
in
self
.
kshp
:
if
kshp_i
is
not
None
:
# Components of kshp should be constant or ints
...
...
@@ -1742,6 +1806,10 @@ class BaseAbstractConv(Op):
if
num_groups
<
1
:
raise
ValueError
(
"num_groups must have value greater than zero"
)
self
.
num_groups
=
num_groups
if
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
self
.
unshared
=
unshared
def
do_constant_folding
(
self
,
node
):
# Disable constant folding since there is no implementation.
...
...
@@ -1768,11 +1836,10 @@ class BaseAbstractConv(Op):
raise
NotImplementedError
(
'flops not implemented for convdim={}'
,
self
.
convdim
)
def
conv
(
self
,
img
,
kern
,
mode
=
"valid"
,
dilation
=
1
,
num_groups
=
1
):
def
conv
(
self
,
img
,
kern
,
mode
=
"valid"
,
dilation
=
1
,
num_groups
=
1
,
unshared
=
False
,
direction
=
"forward"
):
"""
Basic slow Python 2D or 3D convolution for DebugMode
"""
if
not
imported_scipy_signal
:
raise
NotImplementedError
(
"AbstractConv perform requires the python package"
...
...
@@ -1787,18 +1854,29 @@ class BaseAbstractConv(Op):
raise
ValueError
(
'invalid dilation {}, expected {} values'
.
format
(
dilation
,
self
.
convdim
))
if
unshared
and
direction
==
"backprop weights"
:
if
mode
!=
"valid"
:
raise
ValueError
(
'conv mode for unshared backprop wrt weights must be "valid"'
)
# To allow the same format for the call to 'unshared2d' for all three directions,
# the out_shape is shuffled here.
# We do a transpose in the 'perform' function to bring it to the required shape
out_shape
=
(
img
.
shape
[
0
],
kern
.
shape
[
0
],
kern
.
shape
[
2
],
kern
.
shape
[
3
],
img
.
shape
[
2
]
-
kern
.
shape
[
2
]
+
1
,
img
.
shape
[
3
]
-
kern
.
shape
[
3
]
+
1
)
else
:
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
[
1
]
*
self
.
convdim
,
dilation
)
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
[
1
]
*
self
.
convdim
,
dilation
)
out
=
np
.
zeros
(
out_shape
,
dtype
=
img
.
dtype
)
dil_kern_shp
=
kern
.
shape
[:
-
self
.
convdim
]
+
tuple
(
(
kern
.
shape
[
-
self
.
convdim
+
i
]
-
1
)
*
dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
dilated_kern
=
np
.
zeros
(
dil_kern_shp
,
dtype
=
kern
.
dtype
)
dilated_kern
[(
slice
(
None
),
slice
(
None
))
+
dilated_kern
[(
slice
(
None
),)
*
(
dilated_kern
.
ndim
-
self
.
convdim
)
+
tuple
(
slice
(
None
,
None
,
dilation
[
i
])
for
i
in
range
(
self
.
convdim
))
]
=
kern
out
=
np
.
zeros
(
out_shape
,
dtype
=
img
.
dtype
)
if
img
.
shape
[
1
]
%
self
.
num_groups
!=
0
:
raise
ValueError
(
...
...
@@ -1823,11 +1901,19 @@ class BaseAbstractConv(Op):
for
g
in
xrange
(
self
.
num_groups
):
for
n
in
xrange
(
output_channel_offset
):
for
im0
in
xrange
(
input_channel_offset
):
# some cast generates a warning here
out
[
b
,
g
*
output_channel_offset
+
n
,
...
]
+=
_convolve2d
(
img
[
b
,
g
*
input_channel_offset
+
im0
,
...
],
dilated_kern
[
g
*
output_channel_offset
+
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
if
unshared
:
out
[
b
,
g
*
output_channel_offset
+
n
,
...
]
+=
self
.
unshared2d
(
img
[
b
,
g
*
input_channel_offset
+
im0
,
...
],
dilated_kern
[
g
*
output_channel_offset
+
n
,
im0
,
...
],
out_shape
[
2
:],
direction
)
else
:
# some cast generates a warning here
out
[
b
,
g
*
output_channel_offset
+
n
,
...
]
+=
_convolve2d
(
img
[
b
,
g
*
input_channel_offset
+
im0
,
...
],
dilated_kern
[
g
*
output_channel_offset
+
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
elif
self
.
convdim
==
3
:
if
unshared
:
raise
NotImplementedError
(
'Unshared 3D convolution is not implemented'
)
for
b
in
xrange
(
img
.
shape
[
0
]):
for
g
in
xrange
(
self
.
num_groups
):
for
n
in
xrange
(
output_channel_offset
):
...
...
@@ -1839,6 +1925,35 @@ class BaseAbstractConv(Op):
raise
NotImplementedError
(
'only 2D and 3D convolution are implemented'
)
return
out
def
unshared2d
(
self
,
inp
,
kern
,
out_shape
,
direction
=
"forward"
):
'''
Basic slow Python unshared 2d convolution.
'''
if
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
out
=
np
.
zeros
(
out_shape
,
dtype
=
inp
.
dtype
)
if
direction
==
"forward"
:
for
row
in
xrange
(
out_shape
[
0
]):
for
col
in
xrange
(
out_shape
[
1
]):
out
[
row
,
col
]
=
np
.
sum
(
np
.
multiply
(
inp
[
row
:
row
+
kern
.
shape
[
2
],
col
:
col
+
kern
.
shape
[
3
]],
kern
[
row
,
col
,
::
-
1
,
::
-
1
]))
elif
direction
==
"backprop weights"
:
for
row
in
xrange
(
out_shape
[
0
]):
for
col
in
xrange
(
out_shape
[
1
]):
out
[
row
,
col
,
...
]
=
kern
[
row
,
col
]
*
\
inp
[
row
:
row
+
out_shape
[
2
],
col
:
col
+
out_shape
[
3
]]
elif
direction
==
"backprop inputs"
:
for
row
in
xrange
(
kern
.
shape
[
0
]):
for
col
in
xrange
(
kern
.
shape
[
1
]):
out
[
row
:
row
+
kern
.
shape
[
2
],
col
:
col
+
kern
.
shape
[
3
]]
+=
inp
[
row
,
col
]
*
\
kern
[
row
,
col
,
::
-
1
,
::
-
1
]
else
:
raise
ValueError
(
"unshared2d: invalid value '{}' for 'direction'"
.
format
(
direction
))
return
out
class
AbstractConv
(
BaseAbstractConv
):
""" Abstract Op for the forward convolution.
...
...
@@ -1854,14 +1969,16 @@ class AbstractConv(BaseAbstractConv):
subsample
=
None
,
filter_flip
=
True
,
filter_dilation
=
None
,
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv
,
self
)
.
__init__
(
convdim
=
convdim
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
def
make_node
(
self
,
img
,
kern
):
# Make sure both inputs are Variables with the same Type
...
...
@@ -1875,8 +1992,14 @@ class AbstractConv(BaseAbstractConv):
if
img
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'img must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
self
.
unshared
:
if
kern
.
type
.
ndim
!=
2
+
2
*
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor for unshared convolution'
%
(
2
+
2
*
self
.
convdim
))
else
:
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
img
=
assert_shape
(
img
,
self
.
imshp
,
'AbstractConv shape mismatch: shape of '
...
...
@@ -1894,8 +2017,12 @@ class AbstractConv(BaseAbstractConv):
img
,
kern
=
inp
img
=
np
.
asarray
(
img
)
kern
=
np
.
asarray
(
kern
)
dil_kernshp
=
tuple
((
kern
.
shape
[
2
+
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
dil_kernshp
=
tuple
((
kern
.
shape
[
-
self
.
convdim
+
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
if
self
.
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
o
,
=
out_
mode
=
self
.
border_mode
...
...
@@ -1922,8 +2049,34 @@ class AbstractConv(BaseAbstractConv):
for
i
in
range
(
self
.
convdim
))]
=
img
img
=
new_img
if
not
self
.
filter_flip
:
kern
=
kern
[(
slice
(
None
),
slice
(
None
))
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
]
conv_out
=
self
.
conv
(
img
,
kern
,
mode
=
"valid"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)
kern
=
kern
[(
slice
(
None
),)
*
(
kern
.
ndim
-
self
.
convdim
)
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
]
if
self
.
unshared
:
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
self
.
subsample
,
self
.
filter_dilation
)
if
kern
.
shape
[
1
:
1
+
self
.
convdim
]
!=
out_shape
[
2
:
2
+
self
.
convdim
]:
raise
ValueError
(
'Kernel shape {} does not match '
'computed output size {}'
.
format
(
kern
.
shape
[
1
:
1
+
self
.
convdim
],
out_shape
[
2
:
2
+
self
.
convdim
]))
if
any
(
self
.
subsample
[
i
]
>
1
for
i
in
range
(
self
.
convdim
)):
# Expand regions in kernel to correct for subsampling
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
(
1
,)
*
self
.
convdim
,
self
.
filter_dilation
)
exp_kern_shp
=
kern
.
shape
[:
1
]
+
out_shape
[
2
:
2
+
self
.
convdim
]
+
\
kern
.
shape
[
1
+
self
.
convdim
:]
exp_kern
=
np
.
zeros
(
exp_kern_shp
,
dtype
=
kern
.
dtype
)
exp_kern
[(
slice
(
None
),)
+
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
for
i
in
range
(
self
.
convdim
))
+
(
slice
(
None
),)
*
(
self
.
convdim
+
1
)]
=
kern
kern
=
exp_kern
# from (nFilters, out_rows, out_cols, nChannels, kH, kW)
# to (nFilters, nChannels, out_rows, out_cols, kH, kW)
axes_order
=
(
0
,
1
+
self
.
convdim
,)
+
tuple
(
range
(
1
,
1
+
self
.
convdim
))
+
\
tuple
(
range
(
2
+
self
.
convdim
,
kern
.
ndim
))
kern
=
kern
.
transpose
(
axes_order
)
conv_out
=
self
.
conv
(
img
,
kern
,
mode
=
"valid"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
,
unshared
=
self
.
unshared
)
conv_out
=
conv_out
[(
slice
(
None
),
slice
(
None
))
+
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
for
i
in
range
(
self
.
convdim
))]
...
...
@@ -1934,6 +2087,8 @@ class AbstractConv(BaseAbstractConv):
if
self
.
num_groups
>
1
:
raise
NotImplementedError
(
'Rop not implemented for grouped convolutions'
)
if
self
.
unshared
:
raise
NotImplementedError
(
'Rop not implemented for unshared convolution'
)
rval
=
None
if
eval_points
[
0
]
is
not
None
:
rval
=
self
.
make_node
(
eval_points
[
0
],
inputs
[
1
])
.
outputs
[
0
]
...
...
@@ -1953,8 +2108,12 @@ class AbstractConv(BaseAbstractConv):
imshp
=
[
imshp
[
i
]
if
self
.
imshp
[
i
]
is
None
else
self
.
imshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
if
self
.
kshp
is
not
None
:
kshp
=
[
kshp
[
i
]
if
self
.
kshp
[
i
]
is
None
else
self
.
kshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
if
self
.
unshared
:
kshp
=
[
kshp
[
i
]
if
self
.
kshp
[
i
]
is
None
else
self
.
kshp
[
i
]
for
i
in
range
(
2
+
2
*
self
.
convdim
)]
else
:
kshp
=
[
kshp
[
i
]
if
self
.
kshp
[
i
]
is
None
else
self
.
kshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
res
=
get_conv_output_shape
(
imshp
,
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)
return
[
res
]
...
...
@@ -1973,14 +2132,16 @@ class AbstractConv2d(AbstractConv):
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv2d
,
self
)
.
__init__
(
convdim
=
2
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
def
grad
(
self
,
inp
,
grads
):
bottom
,
weights
=
inp
...
...
@@ -1991,14 +2152,16 @@ class AbstractConv2d(AbstractConv):
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)(
num_groups
=
self
.
num_groups
,
unshared
=
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:],
add_assert_shape
=
False
)
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)(
num_groups
=
self
.
num_groups
,
unshared
=
self
.
unshared
)(
bottom
,
top
,
weights
.
shape
[
-
2
:],
add_assert_shape
=
False
)
...
...
@@ -2085,14 +2248,16 @@ class AbstractConv_gradWeights(BaseAbstractConv):
subsample
=
None
,
filter_flip
=
True
,
filter_dilation
=
None
,
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv_gradWeights
,
self
)
.
__init__
(
convdim
=
convdim
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
# Update shape/height_width
def
make_node
(
self
,
img
,
topgrad
,
shape
,
add_assert_shape
=
True
):
...
...
@@ -2115,8 +2280,12 @@ class AbstractConv_gradWeights(BaseAbstractConv):
'image does not match given imshp.'
)
shape
=
as_tensor_variable
(
shape
)
broadcastable
=
[
topgrad
.
broadcastable
[
1
],
img
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
if
self
.
unshared
:
broadcastable
=
[
topgrad
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
+
\
[
img
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
else
:
broadcastable
=
[
topgrad
.
broadcastable
[
1
],
img
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
output
=
img
.
type
.
clone
(
broadcastable
=
broadcastable
)()
return
Apply
(
self
,
[
img
,
topgrad
,
shape
],
[
output
])
...
...
@@ -2134,6 +2303,9 @@ class AbstractConv_gradWeights(BaseAbstractConv):
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
' integers'
.
format
(
mode
))
if
self
.
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
dil_shape
=
tuple
((
shape
[
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
...
...
@@ -2166,9 +2338,7 @@ class AbstractConv_gradWeights(BaseAbstractConv):
topgrad
=
new_topgrad
axes_order
=
(
1
,
0
)
+
tuple
(
range
(
2
,
self
.
convdim
+
2
))
flip_filters
=
((
slice
(
None
),
slice
(
None
))
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
topgrad
=
topgrad
.
transpose
(
axes_order
)[
flip_filters
]
topgrad
=
topgrad
.
transpose
(
axes_order
)
img
=
img
.
transpose
(
axes_order
)
def
correct_for_groups
(
mat
):
...
...
@@ -2182,15 +2352,36 @@ class AbstractConv_gradWeights(BaseAbstractConv):
if
self
.
num_groups
>
1
:
img
=
correct_for_groups
(
img
)
kern
=
self
.
conv
(
img
,
topgrad
,
mode
=
"valid"
,
num_groups
=
self
.
num_groups
)
if
self
.
unshared
:
flip_kern
=
((
slice
(
None
),)
*
(
2
+
self
.
convdim
)
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
kern
=
self
.
conv
(
img
,
topgrad
,
mode
=
"valid"
,
num_groups
=
self
.
num_groups
,
unshared
=
True
,
direction
=
"backprop weights"
)
if
any
(
self
.
subsample
[
i
]
>
1
for
i
in
range
(
self
.
convdim
)):
sub_slice
=
(
slice
(
None
),)
*
2
+
\
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
for
i
in
range
(
0
,
self
.
convdim
))
+
\
(
slice
(
None
),)
*
self
.
convdim
kern
=
kern
[
sub_slice
]
# from (nChannels, nFilters, out_rows, out_cols, kH, kW)
# to (nFilters, out_rows, out_cols, nChannels, kH, kW)
kern_axes
=
(
1
,)
+
tuple
(
range
(
2
,
self
.
convdim
+
2
))
+
(
0
,)
+
\
tuple
(
range
(
self
.
convdim
+
2
,
kern
.
ndim
))
else
:
flip_topgrad
=
flip_kern
=
((
slice
(
None
),
slice
(
None
))
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
topgrad
=
topgrad
[
flip_topgrad
]
kern
=
self
.
conv
(
img
,
topgrad
,
mode
=
"valid"
,
num_groups
=
self
.
num_groups
)
kern_axes
=
(
1
,
0
)
+
tuple
(
range
(
2
,
self
.
convdim
+
2
))
kern
=
kern
.
transpose
(
kern_axes
)
if
any
(
self
.
filter_dilation
[
i
]
>
1
for
i
in
range
(
self
.
convdim
)):
kern
=
kern
[(
slice
(
None
),
slice
(
None
)
)
+
kern
=
kern
[(
slice
(
None
),
)
*
(
kern
.
ndim
-
self
.
convdim
)
+
tuple
(
slice
(
None
,
None
,
self
.
filter_dilation
[
i
])
for
i
in
range
(
self
.
convdim
))]
if
self
.
filter_flip
:
kern
=
kern
.
transpose
(
axes_order
)[
flip_filters
]
else
:
kern
=
kern
.
transpose
(
axes_order
)
kern
=
kern
[
flip_kern
]
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
kern
)
def
connection_pattern
(
self
,
node
):
...
...
@@ -2203,15 +2394,24 @@ class AbstractConv_gradWeights(BaseAbstractConv):
# from the shapes of inputs.
imshp
=
input_shapes
[
0
]
topshp
=
input_shapes
[
1
]
kshp
=
self
.
kshp
[:]
if
self
.
kshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
if
self
.
num_groups
>
1
:
fallback_kshp
=
([
topshp
[
1
],
imshp
[
1
]
//
self
.
num_groups
]
+
if
self
.
kshp
:
kshp
=
self
.
kshp
else
:
if
self
.
unshared
:
kshp
=
[
None
]
*
(
2
+
2
*
self
.
convdim
)
else
:
kshp
=
[
None
]
*
(
2
+
self
.
convdim
)
if
self
.
unshared
:
fallback_kshp
=
([
topshp
[
1
],
topshp
[
2
],
topshp
[
3
],
imshp
[
1
]
//
self
.
num_groups
]
+
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
kshp
=
[
fallback_kshp
[
i
]
if
kshp
[
i
]
is
None
else
kshp
[
i
]
for
i
in
range
(
2
+
2
*
self
.
convdim
)]
else
:
fallback_kshp
=
([
topshp
[
1
],
imshp
[
1
]]
+
fallback_kshp
=
([
topshp
[
1
],
imshp
[
1
]
//
self
.
num_groups
]
+
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
kshp
=
[
fallback_kshp
[
i
]
if
kshp
[
i
]
is
None
else
kshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
kshp
=
[
fallback_kshp
[
i
]
if
kshp
[
i
]
is
None
else
kshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
return
[
kshp
]
...
...
@@ -2232,14 +2432,16 @@ class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv2d_gradWeights
,
self
)
.
__init__
(
convdim
=
2
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
def
grad
(
self
,
inp
,
grads
):
bottom
,
top
=
inp
[:
2
]
...
...
@@ -2249,16 +2451,18 @@ class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
self
.
num_groups
,
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
...
...
@@ -2350,14 +2554,16 @@ class AbstractConv_gradInputs(BaseAbstractConv):
subsample
=
None
,
filter_flip
=
True
,
filter_dilation
=
None
,
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv_gradInputs
,
self
)
.
__init__
(
convdim
=
convdim
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
# Update shape/height_width
def
make_node
(
self
,
kern
,
topgrad
,
shape
,
add_assert_shape
=
True
):
...
...
@@ -2370,10 +2576,19 @@ class AbstractConv_gradInputs(BaseAbstractConv):
broadcastable
=
topgrad
.
broadcastable
)
topgrad
=
gtype
.
filter_variable
(
topgrad
)
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
self
.
unshared
:
if
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
elif
kern
.
type
.
ndim
!=
2
+
2
*
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor for unshared convolution'
%
(
2
+
2
*
self
.
convdim
))
else
:
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
topgrad
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'topgrad must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
raise
TypeError
(
'topgrad must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
add_assert_shape
:
kern
=
assert_shape
(
kern
,
self
.
kshp
,
...
...
@@ -2386,7 +2601,7 @@ class AbstractConv_gradInputs(BaseAbstractConv):
False
]
+
([
False
]
*
self
.
convdim
)
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
kern
.
type
.
broadcastable
[
-
self
.
convdim
-
1
]]
+
([
False
]
*
self
.
convdim
)
output
=
kern
.
type
.
clone
(
broadcastable
=
broadcastable
)()
return
Apply
(
self
,
[
kern
,
topgrad
,
shape
],
[
output
])
...
...
@@ -2403,9 +2618,12 @@ class AbstractConv_gradInputs(BaseAbstractConv):
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
' integers'
.
format
(
mode
))
if
self
.
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
imshp
=
self
.
imshp
[:]
if
self
.
imshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
fallback_imshp
=
([
topgrad
.
shape
[
0
],
kern
.
shape
[
1
]]
+
fallback_imshp
=
([
topgrad
.
shape
[
0
],
kern
.
shape
[
-
self
.
convdim
-
1
]]
+
[
shape
[
i
]
for
i
in
range
(
self
.
convdim
)])
imshp
=
[
fallback_imshp
[
i
]
if
imshp
[
i
]
is
None
else
imshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
...
...
@@ -2419,8 +2637,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
'has shape {}'
.
format
(
tuple
(
expected_topgrad_shape
),
tuple
(
topgrad
.
shape
)))
dil_kernshp
=
tuple
((
kern
.
shape
[
i
+
2
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
dil_kernshp
=
tuple
((
kern
.
shape
[
-
self
.
convdim
+
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
pad
=
(
0
,)
*
self
.
convdim
if
mode
==
"full"
:
pad
=
tuple
(
dil_kernshp
[
i
]
-
1
for
i
in
range
(
self
.
convdim
))
...
...
@@ -2438,25 +2657,54 @@ class AbstractConv_gradInputs(BaseAbstractConv):
for
i
in
range
(
self
.
convdim
))]
=
topgrad
topgrad
=
new_topgrad
axes_order
=
(
1
,
0
)
+
tuple
(
range
(
2
,
self
.
convdim
+
2
))
flip_filters
=
((
slice
(
None
),
slice
(
None
))
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
if
self
.
unshared
:
# Expand regions in kernel to correct for subsampling
exp_kern_shp
=
kern
.
shape
[:
1
]
+
topgrad
.
shape
[
2
:]
+
kern
.
shape
[
1
+
self
.
convdim
:]
exp_kern
=
np
.
zeros
(
exp_kern_shp
,
dtype
=
kern
.
dtype
)
exp_kern
[(
slice
(
None
),)
+
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
for
i
in
range
(
self
.
convdim
))
+
(
slice
(
None
),)
*
(
self
.
convdim
+
1
)]
=
kern
kern
=
exp_kern
def
correct_for_groups
(
mat
):
mshp0
=
mat
.
shape
[
0
]
//
self
.
num_groups
mshp1
=
mat
.
shape
[
1
]
*
self
.
num_groups
mshp1
=
mat
.
shape
[
-
self
.
convdim
-
1
]
*
self
.
num_groups
mat
=
mat
.
reshape
((
self
.
num_groups
,
mshp0
)
+
mat
.
shape
[
1
:])
mat
=
mat
.
transpose
((
1
,
0
,
2
)
+
tuple
(
range
(
3
,
3
+
self
.
convdim
)))
mat
=
mat
.
reshape
((
mshp0
,
mshp1
)
+
mat
.
shape
[
-
self
.
convdim
:])
if
self
.
unshared
:
# for 2D -> (1, 2, 3, 0, 4, 5, 6)
mat
=
mat
.
transpose
(
tuple
(
range
(
1
,
2
+
self
.
convdim
))
+
(
0
,)
+
tuple
(
range
(
2
+
self
.
convdim
,
mat
.
ndim
)))
mat
=
mat
.
reshape
((
mshp0
,)
+
mat
.
shape
[
1
:
1
+
self
.
convdim
]
+
(
mshp1
,)
+
mat
.
shape
[
-
self
.
convdim
:])
else
:
mat
=
mat
.
transpose
((
1
,
0
,
2
)
+
tuple
(
range
(
3
,
3
+
self
.
convdim
)))
mat
=
mat
.
reshape
((
mshp0
,
mshp1
)
+
mat
.
shape
[
-
self
.
convdim
:])
return
mat
kern
=
correct_for_groups
(
kern
)
kern
=
kern
.
transpose
(
axes_order
)
if
self
.
filter_flip
:
topgrad
=
topgrad
[
flip_filters
]
img
=
self
.
conv
(
topgrad
,
kern
,
mode
=
"full"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)
if
self
.
filter_flip
:
img
=
img
[
flip_filters
]
if
self
.
unshared
:
# from (nFilters, out_rows, out_cols, nChannels, kH, kW)
# to (nChannels, nFilters, out_rows, out_cols, kH, kW)
axes_order
=
(
1
+
self
.
convdim
,
0
,)
+
tuple
(
range
(
1
,
1
+
self
.
convdim
))
+
\
tuple
(
range
(
2
+
self
.
convdim
,
kern
.
ndim
))
kern
=
kern
.
transpose
(
axes_order
)
if
not
self
.
filter_flip
:
kern
=
kern
[(
slice
(
None
),)
*
(
kern
.
ndim
-
self
.
convdim
)
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
]
img
=
self
.
conv
(
topgrad
,
kern
,
mode
=
"full"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
,
unshared
=
True
,
direction
=
"backprop inputs"
)
else
:
axes_order
=
(
1
,
0
)
+
tuple
(
range
(
2
,
2
+
self
.
convdim
))
kern
=
kern
.
transpose
(
axes_order
)
flip_filters
=
((
slice
(
None
),
slice
(
None
))
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
if
self
.
filter_flip
:
topgrad
=
topgrad
[
flip_filters
]
img
=
self
.
conv
(
topgrad
,
kern
,
mode
=
"full"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)
if
self
.
filter_flip
:
img
=
img
[
flip_filters
]
if
any
(
p
>
0
for
p
in
pad
):
img
=
img
[(
slice
(
None
),
slice
(
None
))
+
tuple
(
slice
(
pad
[
i
],
img
.
shape
[
i
+
2
]
-
pad
[
i
])
...
...
@@ -2475,10 +2723,10 @@ class AbstractConv_gradInputs(BaseAbstractConv):
topshp
=
input_shapes
[
1
]
imshp
=
self
.
imshp
[:]
if
self
.
imshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
if
self
.
num_groups
>
1
:
fallback_imshp
=
([
topshp
[
0
],
kshp
[
1
]
*
self
.
num_groups
]
+
fallback_imshp
=
([
topshp
[
0
],
kshp
[
-
self
.
convdim
-
1
]
*
self
.
num_groups
]
+
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
else
:
fallback_imshp
=
([
topshp
[
0
],
kshp
[
1
]]
+
fallback_imshp
=
([
topshp
[
0
],
kshp
[
-
self
.
convdim
-
1
]]
+
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
imshp
=
[
fallback_imshp
[
i
]
if
imshp
[
i
]
is
None
else
imshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
...
...
@@ -2503,14 +2751,16 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv2d_gradInputs
,
self
)
.
__init__
(
convdim
=
2
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
def
grad
(
self
,
inp
,
grads
):
weights
,
top
=
inp
[:
2
]
...
...
@@ -2520,7 +2770,8 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
num_groups
)(
self
.
num_groups
,
self
.
unshared
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
...
...
@@ -2528,7 +2779,8 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
...
...
theano/tensor/nnet/c_code/corr_gemm.c
浏览文件 @
9592125c
...
...
@@ -107,7 +107,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
dilW
=
1
,
const
int
padH
=
0
,
const
int
padW
=
0
,
const
int
numgroups
=
1
)
const
int
numgroups
=
1
,
const
int
unshared
=
0
)
{
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
{
...
...
@@ -120,9 +121,9 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
return
NULL
;
}
if
(
PyArray_NDIM
(
weight
)
!=
4
)
if
(
PyArray_NDIM
(
weight
)
!=
(
unshared
?
6
:
4
)
)
{
PyErr_
SetString
(
PyExc_ValueError
,
"CorrMM requires weight of 4D"
);
PyErr_
Format
(
PyExc_ValueError
,
"CorrMM requires weight of %%dD"
,
unshared
?
6
:
4
);
return
NULL
;
}
if
(
PyArray_TYPE
(
weight
)
!=
%
(
float_typenum
)
s
)
...
...
@@ -152,11 +153,12 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
nChannels
=
PyArray_DIMS
(
bottom
)[
1
];
const
int
bottomHeight
=
PyArray_DIMS
(
bottom
)[
2
];
const
int
bottomWidth
=
PyArray_DIMS
(
bottom
)[
3
];
// weights: (nFilters, nChannels, rows, columns)
// normal weights: (nFilters, nChannels, rows, columns)
// unshared weights: (nFilters, topHeight, topWidth, nChannels, rows, columns)
const
int
nFilters
=
PyArray_DIMS
(
weight
)[
0
];
const
int
kH
=
PyArray_DIMS
(
weight
)[
2
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
3
];
if
(
nChannels
!=
(
PyArray_DIMS
(
weight
)[
1
]
*
numgroups
)
)
{
const
int
kH
=
PyArray_DIMS
(
weight
)[
unshared
?
4
:
2
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
unshared
?
5
:
3
];
if
(
nChannels
!=
PyArray_DIMS
(
weight
)[
unshared
?
3
:
1
]
*
numgroups
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
...
...
@@ -179,22 +181,57 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
int
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"CorrMM shape inconsistency:
\n
"
" bottom shape: %%d %%d %%d %%d
\n
"
" weight shape: %%d %%d %%d %%d
\n
"
" top shape: %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
PyArray_DIMS
(
top
)[
0
],
PyArray_DIMS
(
top
)[
1
],
PyArray_DIMS
(
top
)[
2
],
PyArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
if
(
unshared
)
{
if
(
topHeight
!=
PyArray_DIMS
(
weight
)[
1
]
||
topWidth
!=
PyArray_DIMS
(
weight
)[
2
])
{
PyErr_Format
(
PyExc_ValueError
,
"CorrMM regions in kernel must match output regions:
\n
"
" bottom shape: %%d %%d %%d %%d
\n
"
" weight shape: %%d %%ld %%ld %%d %%d %%d"
" (expected %%d %%d %%d %%d %%d %%d)
\n
"
" top shape(calculated): %%d %%d %%d %%d
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
PyArray_DIMS
(
weight
)[
1
],
PyArray_DIMS
(
weight
)[
2
],
nChannels
/
numgroups
,
kH
,
kW
,
nFilters
,
topHeight
,
topWidth
,
nChannels
/
numgroups
,
kH
,
kW
,
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"CorrMM shape inconsistency:
\n
"
" bottom shape: %%d %%d %%d %%d
\n
"
" weight shape: %%d %%d %%d %%d %%d %%d
\n
"
" top shape: %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
topHeight
,
topWidth
,
nChannels
/
numgroups
,
kH
,
kW
,
PyArray_DIMS
(
top
)[
0
],
PyArray_DIMS
(
top
)[
1
],
PyArray_DIMS
(
top
)[
2
],
PyArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
}
else
{
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"CorrMM shape inconsistency:
\n
"
" bottom shape: %%d %%d %%d %%d
\n
"
" weight shape: %%d %%d %%d %%d
\n
"
" top shape: %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
PyArray_DIMS
(
top
)[
0
],
PyArray_DIMS
(
top
)[
1
],
PyArray_DIMS
(
top
)[
2
],
PyArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
}
// Create temporary columns
int
max_threads
=
%
(
omp_get_max_threads
)
s
;
...
...
@@ -230,8 +267,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
group_col_stride
=
(
K_
*
N_
);
const
int
group_weight_stride
=
(
PyArray_STRIDES
(
weight
)[
0
]
*
nFilters
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
M_
=
nFilters
/
numgroups
;
const
int
one_int
=
1
;
const
%
(
c_float_type
)
s
one
=
1
.
0
;
const
%
(
c_float_type
)
s
zero
=
0
.
0
;
const
int
ldw
=
(
K_
*
N_
);
char
NTrans
=
'N'
;
char
Trans
=
'T'
;
PyArrayObject
*
output
;
...
...
@@ -266,15 +305,30 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
&
N_
,
&
M_
,
&
K_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
,
&
N_
);
// Second, gemm
if
(
unshared
)
{
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
)
{
for
(
int
reg
=
0
;
reg
<
N_
;
++
reg
)
{
%
(
gemv
)
s
(
&
Trans
,
&
K_
,
&
M_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
+
reg
*
K_
,
&
ldw
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
+
reg
,
&
N_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
+
reg
,
&
N_
);
}
}
}
else
{
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
&
N_
,
&
M_
,
&
K_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
,
&
N_
);
}
}
}
// Restore to previous blas threads
...
...
@@ -316,7 +370,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
output
=
weight
;
npy_intp
weight_dim
[
2
];
weight_dim
[
0
]
=
(
npy_intp
)
max_threads
;
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
*
numgroups
);
if
(
unshared
)
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
N_
*
K_
*
numgroups
);
else
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
*
numgroups
);
PyArrayObject
*
local_weight
=
(
PyArrayObject
*
)
PyArray_ZEROS
(
2
,
weight_dim
,
PyArray_TYPE
(
weight
),
0
);
...
...
@@ -341,26 +398,46 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
&
K_
,
&
M_
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
g
*
group_weight_stride
+
tid
*
weight_dim
[
1
],
&
K_
);
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
if
(
unshared
)
{
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
)
{
for
(
int
reg
=
0
;
reg
<
N_
;
++
reg
)
{
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
&
K_
,
&
M_
,
&
one_int
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
+
reg
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
+
reg
,
&
N_
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
g
*
group_weight_stride
+
reg
*
K_
+
tid
*
weight_dim
[
1
],
&
ldw
);
}
}
}
else
{
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
&
K_
,
&
M_
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
g
*
group_weight_stride
+
tid
*
weight_dim
[
1
],
&
K_
);
}
}
}
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
//aggregate weights
memset
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
0
,
M_
*
K_
*
sizeof
(
%
(
float_type
)
s
));
memset
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
0
,
weight_dim
[
1
]
*
sizeof
(
%
(
float_type
)
s
));
/*
* Put index "j" into outer loop to get the
* correct result when openmp is used.
...
...
@@ -416,15 +493,29 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
%
(
omp_flags
)
s
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
int
tid
=
%
(
omp_get_thread_num
)
s
;
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// gemm into columns
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
&
N_
,
&
K_
,
&
M_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
);
if
(
unshared
)
{
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
for
(
int
reg
=
0
;
reg
<
N_
;
++
reg
){
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
&
one_int
,
&
K_
,
&
M_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
+
reg
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
+
reg
*
K_
,
&
ldw
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
+
reg
,
&
N_
);
}
}
}
else
{
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
)
{
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
&
N_
,
&
K_
,
&
M_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
);
}
}
// col2im back to the data
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
...
...
@@ -474,4 +565,3 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
// in here output is just aliased to one of bottom, weights, or top.
return
output
;
}
theano/tensor/nnet/corr.py
浏览文件 @
9592125c
...
...
@@ -8,7 +8,7 @@ import theano
from
theano
import
Apply
from
theano
import
gof
from
theano.gof
import
ParamsType
,
EnumList
from
theano.scalar
import
int64
from
theano.scalar
import
int64
,
int8
from
theano.tensor
import
as_tensor_variable
,
TensorType
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
from
theano.tensor
import
blas_headers
...
...
@@ -42,9 +42,11 @@ class BaseCorrMM(gof.OpenMPOp):
Perform dilated correlation (default: (1,1))
num_groups
Perform grouped convolutions (default: 1)
unshared
Perform unshared correlation (default: False)
"""
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
,
'unshared'
)
_direction
=
None
...
...
@@ -54,10 +56,10 @@ class BaseCorrMM(gof.OpenMPOp):
dH
=
int64
,
dW
=
int64
,
dilH
=
int64
,
dilW
=
int64
,
padH
=
int64
,
padW
=
int64
,
num_groups
=
int64
)
num_groups
=
int64
,
unshared
=
int8
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
openmp
=
None
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
,
openmp
=
None
):
super
(
BaseCorrMM
,
self
)
.
__init__
(
openmp
=
openmp
)
if
isinstance
(
border_mode
,
integer_types
):
if
border_mode
<
0
:
...
...
@@ -85,6 +87,7 @@ class BaseCorrMM(gof.OpenMPOp):
raise
ValueError
(
"filter_dilation must have two elements"
)
self
.
subsample
=
tuple
(
subsample
)
self
.
filter_dilation
=
tuple
(
filter_dilation
)
self
.
unshared
=
unshared
if
not
theano
.
config
.
blas
.
ldflags
:
# Theano will use a NumPy C implementation of [sd]gemm_ instead.
...
...
@@ -130,12 +133,13 @@ class BaseCorrMM(gof.OpenMPOp):
padW
=
property
(
lambda
self
:
self
.
pad
[
1
])
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s,
%
s,
%
s
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
border_mode
,
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
),
str
(
self
.
num_groups
))
str
(
self
.
num_groups
),
str
(
self
.
unshared
))
@staticmethod
def
as_common_dtype
(
in1
,
in2
):
...
...
@@ -179,7 +183,7 @@ class BaseCorrMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
7
,
self
.
openmp
,
blas_header_version
())
return
(
9
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
@@ -189,12 +193,14 @@ class BaseCorrMM(gof.OpenMPOp):
assert
dtype
in
(
'float32'
,
'float64'
)
if
dtype
==
'float32'
:
sub
[
'gemm'
]
=
'sgemm_'
sub
[
'gemv'
]
=
'sgemv_'
sub
[
'float_type'
]
=
'npy_float'
sub
[
'float_typenum'
]
=
'NPY_FLOAT'
sub
[
'n_bytes'
]
=
4
sub
[
'c_float_type'
]
=
'float'
else
:
sub
[
'gemm'
]
=
'dgemm_'
sub
[
'gemv'
]
=
'dgemv_'
sub
[
'float_type'
]
=
'npy_double'
sub
[
'float_typenum'
]
=
'NPY_DOUBLE'
sub
[
'n_bytes'
]
=
8
...
...
@@ -287,6 +293,7 @@ class BaseCorrMM(gof.OpenMPOp):
int padH =
%(params)
s->padH;
int padW =
%(params)
s->padW;
int numgroups =
%(params)
s->num_groups;
int unshared =
%(params)
s->unshared;
PyArrayObject * bottom =
%(bottom)
s;
PyArrayObject * weights =
%(weights)
s;
...
...
@@ -310,13 +317,17 @@ class BaseCorrMM(gof.OpenMPOp):
break;
}
int wdim, odim;
wdim = unshared ? 6 : 4;
odim = 4; //Can be set to 6 later for unshared backprop wrt weights
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
int kH, kW, dil_kH, dil_kW;
if (direction != 1) {
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2];
kW = PyArray_DIMS(weights)[
3
];
kH = PyArray_DIMS(weights)[
wdim-
2];
kW = PyArray_DIMS(weights)[
wdim-1
];
}
else {
if (
%(height)
s != -1) {
...
...
@@ -370,28 +381,45 @@ class BaseCorrMM(gof.OpenMPOp):
}
// Infer output shape
npy_intp out_dim[4];
npy_intp out_dim[6];
out_dim[4] = out_dim[5] = 0; //Only used for unshared backprop wrt weights
switch(direction) {
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[
3
]-1)*dilW + 1)) / dW + 1);
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[
wdim-
2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[
wdim-1
]-1)*dilW + 1)) / dW + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
if (unshared) {
PyErr_Format(PyExc_ValueError,
"CorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4], (long int)PyArray_DIMS(weights)[5],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
}
else {
PyErr_Format(PyExc_ValueError,
"CorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
}
%(fail)
s
}
break;
...
...
@@ -399,46 +427,90 @@ class BaseCorrMM(gof.OpenMPOp):
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1] / numgroups;
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
if (unshared){
odim = 6;
out_dim[1] = (npy_intp)PyArray_DIMS(top)[2];
out_dim[2] = (npy_intp)PyArray_DIMS(top)[3];
}
out_dim[wdim-3] = (npy_intp)PyArray_DIMS(bottom)[1] / numgroups;
out_dim[wdim-2] = (npy_intp)kH; // already inferred further above
out_dim[wdim-1] = (npy_intp)kW; // how convenient
if (unshared) {
if (out_dim[0] < 0 || out_dim[1] <= 0 || out_dim[2] <= 0 || out_dim[3] < 0
|| out_dim[4] <= 0 || out_dim[5] <= 0){
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4], (long int)out_dim[5],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
}
else {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
}
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1] * numgroups;
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - 2*padW);
if (unshared) {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[4], (long int)PyArray_DIMS(weights)[5],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
}
else {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
}
break;
default:
...
...
@@ -448,13 +520,19 @@ class BaseCorrMM(gof.OpenMPOp):
// Prepare output array
int typenum;
if ( !(*out
&& PyArray_NDIM(*out)==4
int failure;
failure = !(*out
&& PyArray_NDIM(*out)==odim
&& PyArray_IS_C_CONTIGUOUS(*out)
&& PyArray_DIMS(*out)[0]==out_dim[0]
&& PyArray_DIMS(*out)[1]==out_dim[1]
&& PyArray_DIMS(*out)[2]==out_dim[2]
&& PyArray_DIMS(*out)[3]==out_dim[3]))
&& PyArray_DIMS(*out)[3]==out_dim[3]);
if (odim == 6){
failure = failure || !(PyArray_DIMS(*out)[4]==out_dim[4]
&& PyArray_DIMS(*out)[5]==out_dim[5]);
}
if ( failure )
{
Py_XDECREF(*out);
if (direction != 1) {
...
...
@@ -464,21 +542,29 @@ class BaseCorrMM(gof.OpenMPOp):
typenum = PyArray_TYPE(bottom);
}
//Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
*out = (PyArrayObject*)PyArray_ZEROS(
4
,
*out = (PyArrayObject*)PyArray_ZEROS(
odim
,
out_dim,
typenum,
0);
if (NULL == *out)
{
PyErr_Format(PyExc_RuntimeError,
"BaseCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld",
(long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3]);
if (odim == 4) {
PyErr_Format(PyExc_RuntimeError,
"BaseCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld",
(long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3]);
}
if (odim == 6) {
PyErr_Format(PyExc_RuntimeError,
"BaseCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld
%%
lld
%%
lld",
(long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3],
(long long)out_dim[4], (long long)out_dim[5]);
}
%(fail)
s
}
}
// Call corrMM code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups
);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups
, unshared
);
if (out2==NULL){
%(fail)
s
}
...
...
@@ -514,6 +600,13 @@ class CorrMM(BaseCorrMM):
The filter dilation operation applied to each input image.
Should be a tuple with 2 elements.
Set to `(1, 1)` to disable filter dilation.
num_groups
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately.
Should be an integer.
unshared
Boolean value. If true, then a different filter will be applied to
each region of the input image.
"""
...
...
@@ -525,8 +618,12 @@ class CorrMM(BaseCorrMM):
img
,
kern
=
self
.
as_common_dtype
(
img
,
kern
)
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
self
.
unshared
is
True
:
if
kern
.
type
.
ndim
!=
6
:
raise
TypeError
(
'kern must be 6D tensor'
)
else
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
broadcastable
=
[
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
False
,
False
]
...
...
@@ -555,13 +652,15 @@ class CorrMM(BaseCorrMM):
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
self
.
num_groups
,
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
self
.
num_groups
,
self
.
unshared
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
...
...
@@ -595,8 +694,12 @@ class CorrMM_gradWeights(BaseCorrMM):
else
:
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
if
self
.
unshared
is
True
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
img
.
type
.
broadcastable
[
1
],
False
,
False
]
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
dtype
=
img
.
type
.
dtype
return
Apply
(
self
,
[
img
,
topgrad
]
+
height_width
,
[
TensorType
(
dtype
,
broadcastable
)()])
...
...
@@ -633,7 +736,10 @@ class CorrMM_gradWeights(BaseCorrMM):
kW
=
2
-
imshp
[
1
]
+
(
topshp
[
1
]
-
1
)
*
dW
else
:
kW
=
imshp
[
1
]
+
2
*
padW
-
(
topshp
[
1
]
-
1
)
*
dW
return
[(
nkern
,
ssize
,
kH
,
kW
)]
if
self
.
unshared
is
True
:
return
[(
nkern
,
topshp
[
0
],
topshp
[
1
],
ssize
,
kH
,
kW
)]
else
:
return
[(
nkern
,
ssize
,
kH
,
kW
)]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
bottom
,
top
=
inp
[:
2
]
...
...
@@ -649,12 +755,14 @@ class CorrMM_gradWeights(BaseCorrMM):
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
self
.
num_groups
,
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
return
(
d_bottom
,
d_top
)
+
d_height_width
...
...
@@ -684,8 +792,12 @@ class CorrMM_gradInputs(BaseCorrMM):
kern
=
as_tensor_variable
(
kern
)
topgrad
=
as_tensor_variable
(
topgrad
)
kern
,
topgrad
=
self
.
as_common_dtype
(
kern
,
topgrad
)
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
self
.
unshared
is
True
:
if
kern
.
type
.
ndim
!=
6
:
raise
TypeError
(
'kern must be 6D tensor'
)
else
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
shape
is
None
:
...
...
@@ -700,7 +812,7 @@ class CorrMM_gradInputs(BaseCorrMM):
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
False
]
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
-
3
],
False
,
False
]
dtype
=
kern
.
type
.
dtype
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
...
...
@@ -719,7 +831,7 @@ class CorrMM_gradInputs(BaseCorrMM):
dH
,
dW
=
self
.
subsample
kshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
ssize
,
kshp
=
kshp
[
1
],
list
(
kshp
[
2
:])
ssize
,
kshp
=
kshp
[
-
3
],
list
(
kshp
[
-
2
:])
ssize
=
ssize
*
self
.
num_groups
bsize
,
topshp
=
topshp
[
0
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
...
...
@@ -762,13 +874,15 @@ class CorrMM_gradInputs(BaseCorrMM):
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
self
.
num_groups
,
self
.
unshared
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
return
(
d_weights
,
d_top
)
+
d_height_width
...
...
theano/tensor/nnet/opt.py
浏览文件 @
9592125c
...
...
@@ -82,12 +82,14 @@ def local_abstractconv_gemm(node):
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern
=
kern
[
flip
]
rval
=
CorrMM
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
kern
)
num_groups
=
node
.
op
.
num_groups
,
unshared
=
node
.
op
.
unshared
)(
img
,
kern
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
...
...
@@ -134,12 +136,15 @@ def local_abstractconv_gradweight_gemm(node):
rval
=
CorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
topgrad
,
shape
)
num_groups
=
node
.
op
.
num_groups
,
unshared
=
node
.
op
.
unshared
)(
img
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
rval
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
rval
=
rval
[
flip
]
rval
=
theano
.
tensor
.
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
...
...
@@ -189,12 +194,14 @@ def local_abstractconv_gradinputs_gemm(node):
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern
=
kern
[
flip
]
rval
=
CorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
kern
,
topgrad
,
shape
)
num_groups
=
node
.
op
.
num_groups
,
unshared
=
node
.
op
.
unshared
)(
kern
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
...
...
@@ -242,7 +249,7 @@ def local_conv2d_cpu(node):
if
not
node
.
op
.
filter_flip
:
# Not tested yet
return
None
if
node
.
op
.
num_groups
>
1
:
if
node
.
op
.
num_groups
>
1
or
node
.
op
.
unshared
:
return
None
rval
=
conv2d
(
img
,
kern
,
...
...
@@ -270,7 +277,7 @@ def local_conv2d_gradweight_cpu(node):
if
not
node
.
op
.
filter_flip
:
# Not tested yet
return
if
node
.
op
.
num_groups
>
1
:
if
node
.
op
.
num_groups
>
1
or
node
.
op
.
unshared
:
return
None
if
node
.
op
.
border_mode
==
'valid'
and
\
...
...
@@ -370,7 +377,7 @@ def local_conv2d_gradinputs_cpu(node):
if
not
node
.
op
.
filter_flip
:
# Not tested yet
return
None
if
node
.
op
.
num_groups
>
1
:
if
node
.
op
.
num_groups
>
1
or
node
.
op
.
unshared
:
return
None
# Conv 3d implementation, needed when subsample > 2
...
...
theano/tensor/nnet/tests/test_abstract_conv.py
浏览文件 @
9592125c
...
...
@@ -1744,3 +1744,154 @@ class Separable_conv(unittest.TestCase):
fun
=
theano
.
function
([
x_sym
,
dfilter_sym
,
pfilter_sym
],
sep_op
,
mode
=
'FAST_RUN'
)
top
=
fun
(
x
[:,
:,
:
3
,
:
3
,
:
3
],
depthwise_filter
,
pointwise_filter
)
utt
.
assert_allclose
(
top
,
precomp_output
)
class
TestUnsharedConv
(
unittest
.
TestCase
):
conv2d
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
conv2d_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
mode
=
theano
.
compile
.
mode
.
Mode
(
optimizer
=
'None'
)
def
setUp
(
self
):
self
.
img_shape
=
[(
2
,
2
,
4
,
4
),
(
3
,
2
,
4
,
2
),
(
3
,
3
,
5
,
3
),
(
3
,
4
,
4
,
4
)]
self
.
kern_shape
=
[(
2
,
2
,
2
,
2
,
3
,
3
),
(
2
,
4
,
2
,
2
,
4
,
2
),
(
3
,
2
,
1
,
1
,
3
,
3
),
(
4
,
3
,
3
,
2
,
4
,
2
)]
self
.
topgrad_shape
=
[(
2
,
2
,
2
,
2
),
(
3
,
2
,
4
,
2
),
(
3
,
3
,
2
,
1
),
(
3
,
4
,
3
,
3
)]
self
.
border_mode
=
[
'valid'
,
'full'
,
'valid'
,
'full'
]
self
.
subsample
=
[(
1
,
1
),
(
2
,
2
),
(
2
,
1
),
(
3
,
2
)]
self
.
filter_dilation
=
(
1
,
1
)
self
.
num_groups
=
[
1
,
1
,
3
,
2
]
# self.verify_flags = np.random.choice([True, False], 4, [0.5, 0.5])
# Above line can be used instead if speed is a concern
self
.
verify_flags
=
[
True
]
*
4
self
.
ref_mode
=
'FAST_RUN'
if
theano
.
config
.
cxx
==
""
:
raise
SkipTest
(
"CorrMM needs cxx"
)
def
test_fwd
(
self
):
tensor6
=
theano
.
tensor
.
TensorType
(
theano
.
config
.
floatX
,
(
False
,)
*
6
)
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
kern_sym
=
tensor6
(
'kern'
)
ref_kern_sym
=
theano
.
tensor
.
tensor4
(
'ref_kern'
)
for
imshp
,
kshp
,
mode
,
sub
,
groups
,
verify
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
border_mode
,
self
.
subsample
,
self
.
num_groups
,
self
.
verify_flags
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
unshared_conv_op
=
self
.
conv2d
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
True
)
unshared_out_sym
=
unshared_conv_op
(
img_sym
,
kern_sym
)
unshared_func
=
theano
.
function
([
img_sym
,
kern_sym
],
unshared_out_sym
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_op
)
for
node
in
unshared_func
.
maker
.
fgraph
.
toposort
()])
unshared_output
=
unshared_func
(
img
,
kern
)
single_kshp
=
kshp
[:
1
]
+
kshp
[
3
:]
ref_conv_op
=
self
.
conv2d
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
False
)
ref_out_sym
=
ref_conv_op
(
img_sym
,
ref_kern_sym
)
ref_func
=
theano
.
function
([
img_sym
,
ref_kern_sym
],
ref_out_sym
,
mode
=
self
.
mode
)
for
i
in
range
(
0
,
kshp
[
1
]):
for
j
in
range
(
0
,
kshp
[
2
]):
single_kern
=
kern
[:,
i
,
j
,
...
]
.
reshape
(
single_kshp
)
ref_val
=
ref_func
(
img
,
single_kern
)
utt
.
assert_allclose
(
ref_val
[:,
:,
i
,
j
],
unshared_output
[:,
:,
i
,
j
])
if
verify
:
utt
.
verify_grad
(
unshared_conv_op
,
[
img
,
kern
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradweight
(
self
):
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
for
imshp
,
kshp
,
topshp
,
mode
,
sub
,
groups
,
verify
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
topgrad_shape
,
self
.
border_mode
,
self
.
subsample
,
self
.
num_groups
,
self
.
verify_flags
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
topshp
)
.
astype
(
theano
.
config
.
floatX
)
unshared_conv_op
=
self
.
conv2d_gradw
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
True
)
unshared_out_sym
=
unshared_conv_op
(
img_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
kshp
[
-
2
:]))
unshared_func
=
theano
.
function
([
img_sym
,
top_sym
],
unshared_out_sym
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradw_op
)
for
node
in
unshared_func
.
maker
.
fgraph
.
toposort
()])
unshared_output
=
unshared_func
(
img
,
top
)
single_kshp
=
kshp
[:
1
]
+
kshp
[
3
:]
ref_conv_op
=
self
.
conv2d_gradw
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
False
)
ref_out_sym
=
ref_conv_op
(
img_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
single_kshp
[
-
2
:]))
ref_func
=
theano
.
function
([
img_sym
,
top_sym
],
ref_out_sym
,
mode
=
self
.
mode
)
for
i
in
range
(
0
,
topshp
[
2
]):
for
j
in
range
(
0
,
topshp
[
3
]):
top_single
=
np
.
zeros_like
(
top
)
top_single
[:,
:,
i
,
j
]
=
top
[:,
:,
i
,
j
]
ref_output
=
ref_func
(
img
,
top_single
)
utt
.
assert_allclose
(
unshared_output
[:,
i
,
j
,
...
],
ref_output
)
def
conv_gradweight
(
inputs_val
,
output_val
):
return
unshared_conv_op
(
inputs_val
,
output_val
,
tensor
.
as_tensor_variable
(
kshp
[
-
2
:]))
if
verify
:
utt
.
verify_grad
(
conv_gradweight
,
[
img
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradinput
(
self
):
tensor6
=
theano
.
tensor
.
TensorType
(
theano
.
config
.
floatX
,
(
False
,)
*
6
)
kern_sym
=
tensor6
(
'kern'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
ref_kern_sym
=
theano
.
tensor
.
tensor4
(
'ref_kern'
)
for
imshp
,
kshp
,
topshp
,
mode
,
sub
,
groups
,
verify
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
topgrad_shape
,
self
.
border_mode
,
self
.
subsample
,
self
.
num_groups
,
self
.
verify_flags
):
single_kshp
=
kshp
[:
1
]
+
kshp
[
3
:]
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
topshp
)
.
astype
(
theano
.
config
.
floatX
)
unshared_conv_op
=
self
.
conv2d_gradi
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
True
)
unshared_out_sym
=
unshared_conv_op
(
kern_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
imshp
[
-
2
:]))
unshared_func
=
theano
.
function
([
kern_sym
,
top_sym
],
unshared_out_sym
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradi_op
)
for
node
in
unshared_func
.
maker
.
fgraph
.
toposort
()])
unshared_output
=
unshared_func
(
kern
,
top
)
ref_conv_op
=
self
.
conv2d_gradi
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
False
)
ref_out_sym
=
ref_conv_op
(
ref_kern_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
imshp
[
-
2
:]))
ref_func
=
theano
.
function
([
ref_kern_sym
,
top_sym
],
ref_out_sym
,
mode
=
self
.
mode
)
ref_output
=
np
.
zeros
(
imshp
)
for
i
in
range
(
0
,
topshp
[
2
]):
for
j
in
range
(
0
,
topshp
[
3
]):
single_kern
=
kern
[:,
i
,
j
,
...
]
.
reshape
(
single_kshp
)
top_single
=
np
.
zeros_like
(
top
)
top_single
[:,
:,
i
,
j
]
=
top
[:,
:,
i
,
j
]
ref_output
+=
ref_func
(
single_kern
,
top_single
)
utt
.
assert_allclose
(
ref_output
,
unshared_output
)
def
conv_gradinputs
(
filters_val
,
output_val
):
return
unshared_conv_op
(
filters_val
,
output_val
,
tensor
.
as_tensor_variable
(
imshp
[
-
2
:]))
if
verify
:
utt
.
verify_grad
(
conv_gradinputs
,
[
kern
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
9592125c
...
...
@@ -10,7 +10,7 @@ import theano
import
theano.tensor
as
T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
class
TestCorr2D
(
utt
.
InferShapeTester
):
...
...
@@ -452,6 +452,16 @@ class TestGroupCorr2d(Grouped_conv_noOptim):
utt
.
assert_allclose
(
gconv_output
,
conv_output
)
class
TestUnsharedCorr2d
(
TestUnsharedConv
):
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
.
excluding
(
'gpuarray'
)
else
:
mode
=
None
conv2d_op
=
corr
.
CorrMM
conv2d_gradw_op
=
corr
.
CorrMM_gradWeights
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
if
__name__
==
'__main__'
:
t
=
TestCorr2D
(
'setUp'
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论