Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9592125c
提交
9592125c
authored
8月 25, 2017
作者:
Frédéric Bastien
提交者:
GitHub
8月 25, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6286 from vikramnitin9/grouped_unshared
Implement Unshared Convolution
上级
c32b0db8
121f96d6
隐藏空白字符变更
内嵌
并排
正在显示
12 个修改的文件
包含
1382 行增加
和
450 行删除
+1382
-450
blas.py
theano/gpuarray/blas.py
+191
-84
corr_gemm.c
theano/gpuarray/c_code/corr_gemm.c
+171
-65
dnn.py
theano/gpuarray/dnn.py
+11
-0
opt.py
theano/gpuarray/opt.py
+44
-21
test_gemmcorr.py
theano/gpuarray/tests/test_gemmcorr.py
+57
-12
__init__.py
theano/tensor/nnet/__init__.py
+24
-8
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+359
-107
corr_gemm.c
theano/tensor/nnet/c_code/corr_gemm.c
+146
-56
corr.py
theano/tensor/nnet/corr.py
+199
-85
opt.py
theano/tensor/nnet/opt.py
+18
-11
test_abstract_conv.py
theano/tensor/nnet/tests/test_abstract_conv.py
+151
-0
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+11
-1
没有找到文件。
theano/gpuarray/blas.py
浏览文件 @
9592125c
...
@@ -458,13 +458,15 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -458,13 +458,15 @@ class BaseGpuCorrMM(CGpuKernelBase):
num_groups :
num_groups :
Divides the image, kernel and output tensors into num_groups
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately (default : 1).
separate groups. Each which carry out convolutions separately (default : 1).
unshared
Perform unshared correlation (default: False)
"""
"""
check_broadcast
=
False
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
,
'unshared'
)
_f16_ok
=
True
_f16_ok
=
True
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
if
isinstance
(
border_mode
,
integer_types
):
if
isinstance
(
border_mode
,
integer_types
):
border_mode
=
(
border_mode
,
border_mode
)
border_mode
=
(
border_mode
,
border_mode
)
if
isinstance
(
border_mode
,
tuple
):
if
isinstance
(
border_mode
,
tuple
):
...
@@ -487,6 +489,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -487,6 +489,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
raise
ValueError
(
"Number of groups should be greater than 0"
)
raise
ValueError
(
"Number of groups should be greater than 0"
)
self
.
num_groups
=
num_groups
self
.
num_groups
=
num_groups
CGpuKernelBase
.
__init__
(
self
,
[
'c_code/corr_gemm.c'
])
CGpuKernelBase
.
__init__
(
self
,
[
'c_code/corr_gemm.c'
])
self
.
unshared
=
unshared
@property
@property
def
pad
(
self
):
def
pad
(
self
):
...
@@ -495,12 +498,13 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -495,12 +498,13 @@ class BaseGpuCorrMM(CGpuKernelBase):
return
(
0
,
0
)
return
(
0
,
0
)
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s,
%
s,
%
s
,
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
,
self
.
border_mode
,
self
.
border_mode
,
str
(
self
.
subsample
),
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
),
str
(
self
.
filter_dilation
),
str
(
self
.
num_groups
))
str
(
self
.
num_groups
),
str
(
self
.
unshared
))
def
__setstate__
(
self
,
d
):
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
self
.
__dict__
.
update
(
d
)
...
@@ -533,7 +537,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -533,7 +537,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# Raise this whenever modifying the C code (including the file).
# Raise this whenever modifying the C code (including the file).
return
(
1
0
,)
return
(
1
1
,)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
"""
"""
...
@@ -581,6 +585,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -581,6 +585,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
dH
,
dW
=
self
.
subsample
dH
,
dW
=
self
.
subsample
dilH
,
dilW
=
self
.
filter_dilation
dilH
,
dilW
=
self
.
filter_dilation
numgroups
=
self
.
num_groups
numgroups
=
self
.
num_groups
unshared
=
int
(
self
.
unshared
)
if
self
.
border_mode
==
"half"
:
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
elif
self
.
border_mode
==
"full"
:
...
@@ -633,19 +638,24 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -633,19 +638,24 @@ class BaseGpuCorrMM(CGpuKernelBase):
int padH =
%(padH)
s;
int padH =
%(padH)
s;
int padW =
%(padW)
s;
int padW =
%(padW)
s;
int numgroups =
%(numgroups)
s;
int numgroups =
%(numgroups)
s;
int unshared =
%(unshared)
s;
PyGpuArrayObject * bottom =
%(bottom)
s;
PyGpuArrayObject * bottom =
%(bottom)
s;
PyGpuArrayObject * weights =
%(weights)
s;
PyGpuArrayObject * weights =
%(weights)
s;
PyGpuArrayObject * top =
%(top)
s;
PyGpuArrayObject * top =
%(top)
s;
PyGpuArrayObject * out2 = NULL;
PyGpuArrayObject * out2 = NULL;
int wdim, odim;
wdim = unshared ? 6 : 4;
odim = 4; //Can be set to 6 later for unshared backprop wrt weights
// Obtain or infer kernel width and height
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
// (we need to know it early to be able to handle auto-padding)
size_t kH, kW, dil_kH, dil_kW;
size_t kH, kW, dil_kH, dil_kW;
if (direction != 1) {
if (direction != 1) {
// weight is an input variable, we can just read its shape
// weight is an input variable, we can just read its shape
kH = PyGpuArray_DIMS(weights)[2];
kH = PyGpuArray_DIMS(weights)[
wdim-
2];
kW = PyGpuArray_DIMS(weights)[
3
];
kW = PyGpuArray_DIMS(weights)[
wdim-1
];
}
}
else {
else {
if (
%(height)
s != -1) {
if (
%(height)
s != -1) {
...
@@ -699,8 +709,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -699,8 +709,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
// Infer output shape and type
// Infer output shape and type
// The inferred shape can be negative.
// The inferred shape can be negative.
long long out_dim[4];
long long out_dim[6];
size_t out_dim_size[4];
size_t out_dim_size[6];
out_dim[4] = out_dim[5] = 0; //Only used for unshared backprop wrt weights
out_dim_size[4] = out_dim_size[5] = 0; //Same
int out_typecode;
int out_typecode;
PyGpuContextObject *out_context;
PyGpuContextObject *out_context;
switch(direction) {
switch(direction) {
...
@@ -709,71 +721,131 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -709,71 +721,131 @@ class BaseGpuCorrMM(CGpuKernelBase):
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = PyGpuArray_DIMS(bottom)[0];
out_dim[0] = PyGpuArray_DIMS(bottom)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[0];
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] + 2*padH - ((PyGpuArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1;
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] + 2*padH - ((PyGpuArray_DIMS(weights)[
wdim-
2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[
3
]-1)*dilW + 1)) / dW + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] + 2*padW - ((PyGpuArray_DIMS(weights)[
wdim-1
]-1)*dilW + 1)) / dW + 1;
out_typecode = bottom->ga.typecode;
out_typecode = bottom->ga.typecode;
out_context = bottom->context;
out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
{
PyErr_Format(PyExc_ValueError,
if (unshared) {
"GpuCorrMM: impossible output shape
\\
n"
PyErr_Format(PyExc_ValueError,
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
"GpuCorrMM: impossible output shape
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
%(fail)
s
PyGpuArray_DIMS(weights)[4], PyGpuArray_DIMS(weights)[5],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)
s
}
else {
PyErr_Format(PyExc_ValueError,
"GpuCorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
%(fail)
s
}
}
}
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width) or
// (num_filters, top_height, top_width, num_channels, height, width) -> for unshared
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = PyGpuArray_DIMS(top)[1];
out_dim[0] = PyGpuArray_DIMS(top)[1];
out_dim[1] = PyGpuArray_DIMS(bottom)[1] / numgroups;
if (unshared){
out_dim[2] = kH; // already inferred further above
odim = 6;
out_dim[3] = kW; // how convenient
out_dim[1] = PyGpuArray_DIMS(top)[2];
out_dim[2] = PyGpuArray_DIMS(top)[3];
}
out_dim[wdim-3] = PyGpuArray_DIMS(bottom)[1] / numgroups;
out_dim[wdim-2] = kH; // already inferred further above
out_dim[wdim-1] = kW; // how convenient
out_typecode = top->ga.typecode;
out_typecode = top->ga.typecode;
out_context = top->context;
out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (unshared) {
{
if (out_dim[0] < 0 || out_dim[1] <= 0 || out_dim[2] <= 0 || out_dim[3] < 0
PyErr_Format(PyExc_ValueError,
|| out_dim[4] <= 0 || out_dim[5] <= 0){
"GpuCorrMM backprop wrt. weights: impossible output shape
\\
n"
PyErr_Format(PyExc_ValueError,
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
"GpuCorrMM backprop wrt. weights: impossible output shape
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
out_dim[4], out_dim[5],
%(fail)
s
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
}
else {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(bottom)[0], PyGpuArray_DIMS(bottom)[1],
PyGpuArray_DIMS(bottom)[2], PyGpuArray_DIMS(bottom)[3],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
}
}
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[
1
] * numgroups;
out_dim[1] = PyGpuArray_DIMS(weights)[
wdim-3
] * numgroups;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[
wdim-
2]-1)*dilH + 1 - 2*padH;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[
3
]-1)*dilW + 1 - 2*padW;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[
wdim-1
]-1)*dilW + 1 - 2*padW;
out_typecode = top->ga.typecode;
out_typecode = top->ga.typecode;
out_context = top->context;
out_context = top->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (unshared) {
{
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
PyErr_Format(PyExc_ValueError,
{
"GpuCorrMM backprop wrt. inputs: impossible output shape
\\
n"
PyErr_Format(PyExc_ValueError,
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
"GpuCorrMM backprop wrt. inputs: impossible output shape
\\
n"
" weight shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
" weight shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
PyGpuArray_DIMS(weights)[4], PyGpuArray_DIMS(weights)[5],
%(fail)
s
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
}
else {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"GpuCorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weight shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
out_dim[0], out_dim[1], out_dim[2], out_dim[3],
PyGpuArray_DIMS(weights)[0], PyGpuArray_DIMS(weights)[1],
PyGpuArray_DIMS(weights)[2], PyGpuArray_DIMS(weights)[3],
PyGpuArray_DIMS(top)[0], PyGpuArray_DIMS(top)[1],
PyGpuArray_DIMS(top)[2], PyGpuArray_DIMS(top)[3]);
%(fail)
s
}
}
}
break;
break;
default:
default:
...
@@ -786,12 +858,24 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -786,12 +858,24 @@ class BaseGpuCorrMM(CGpuKernelBase):
out_dim_size[2] = (size_t)out_dim[2];
out_dim_size[2] = (size_t)out_dim[2];
out_dim_size[3] = (size_t)out_dim[3];
out_dim_size[3] = (size_t)out_dim[3];
if (odim == 6) {
out_dim_size[4] = (size_t)out_dim[4];
out_dim_size[5] = (size_t)out_dim[5];
}
// Prepare output array
// Prepare output array
if (theano_prep_output(&
%(out)
s,
4
, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
if (theano_prep_output(&
%(out)
s,
odim
, out_dim_size, out_typecode, GA_C_ORDER, out_context) != 0)
{
{
PyErr_Format(PyExc_RuntimeError,
if (odim == 4) {
"BaseGpuCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld",
PyErr_Format(PyExc_RuntimeError,
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
"BaseGpuCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld",
out_dim[0], out_dim[1], out_dim[2], out_dim[3]);
}
if (odim == 6) {
PyErr_Format(PyExc_RuntimeError,
"BaseGpuCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld
%%
lld
%%
lld",
out_dim[0], out_dim[1], out_dim[2], out_dim[3], out_dim[4], out_dim[5]);
}
%(fail)
s
%(fail)
s
}
}
if (!GpuArray_IS_C_CONTIGUOUS(&
%(out)
s->ga)) {
if (!GpuArray_IS_C_CONTIGUOUS(&
%(out)
s->ga)) {
...
@@ -800,7 +884,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -800,7 +884,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
}
}
// Call GPU code
// Call GPU code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups
, unshared
);
if (out2==NULL){
if (out2==NULL){
%(fail)
s
%(fail)
s
}
}
...
@@ -840,6 +924,8 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -840,6 +924,8 @@ class GpuCorrMM(BaseGpuCorrMM):
divided into.
divided into.
should be an int
should be an int
set to 1 to disable grouped convolution
set to 1 to disable grouped convolution
unshared
Perform unshared correlation (default: False)
Notes
Notes
-----
-----
...
@@ -859,9 +945,9 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -859,9 +945,9 @@ class GpuCorrMM(BaseGpuCorrMM):
"""
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
super
(
GpuCorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
super
(
GpuCorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
img
,
kern
):
def
make_node
(
self
,
img
,
kern
):
ctx_name
=
infer_context_name
(
img
,
kern
)
ctx_name
=
infer_context_name
(
img
,
kern
)
...
@@ -869,8 +955,12 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -869,8 +955,12 @@ class GpuCorrMM(BaseGpuCorrMM):
kern
=
as_gpuarray_variable
(
kern
,
ctx_name
)
kern
=
as_gpuarray_variable
(
kern
,
ctx_name
)
if
img
.
type
.
ndim
!=
4
:
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
raise
TypeError
(
'img must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
4
:
if
self
.
unshared
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
6
:
raise
TypeError
(
'kern must be 6D tensor'
)
else
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
broadcastable
=
[
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
broadcastable
=
[
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
False
,
False
]
False
,
False
]
...
@@ -891,12 +981,14 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -891,12 +981,14 @@ class GpuCorrMM(BaseGpuCorrMM):
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
self
.
num_groups
,
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
self
.
num_groups
,
self
.
unshared
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
return
d_bottom
,
d_weights
...
@@ -915,10 +1007,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -915,10 +1007,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
def
__init__
(
self
,
border_mode
=
"valid"
,
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
subsample
,
filter_dilation
,
num_groups
)
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
img
,
topgrad
)
ctx_name
=
infer_context_name
(
img
,
topgrad
)
...
@@ -938,8 +1032,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -938,8 +1032,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
0
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
assert
shape
[
1
]
.
ndim
==
0
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
if
self
.
unshared
:
False
,
False
]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
img
.
type
.
broadcastable
[
1
],
False
,
False
]
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
return
Apply
(
self
,
[
img
,
topgrad
]
+
height_width
,
[
GpuArrayType
(
dtype
=
img
.
dtype
,
return
Apply
(
self
,
[
img
,
topgrad
]
+
height_width
,
[
GpuArrayType
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
,
context_name
=
ctx_name
,
broadcastable
=
broadcastable
)()])
broadcastable
=
broadcastable
)()])
...
@@ -958,11 +1056,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -958,11 +1056,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
self
.
num_groups
,
top
,
self
.
unshared
)(
weights
,
bottom
.
shape
[
-
2
:])
top
,
bottom
.
shape
[
-
2
:])
d_top
=
GpuCorrMM
(
d_top
=
GpuCorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
d_height_width
=
(
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),
theano
.
gradient
.
DisconnectedType
()(),
)
*
2
if
len
(
inp
)
==
4
else
()
)
*
2
if
len
(
inp
)
==
4
else
()
...
@@ -989,16 +1088,22 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -989,16 +1088,22 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
def
__init__
(
self
,
border_mode
=
"valid"
,
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
)
filter_dilation
,
num_groups
,
unshared
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
ctx_name
=
infer_context_name
(
kern
,
topgrad
)
ctx_name
=
infer_context_name
(
kern
,
topgrad
)
kern
=
as_gpuarray_variable
(
kern
,
ctx_name
)
kern
=
as_gpuarray_variable
(
kern
,
ctx_name
)
topgrad
=
as_gpuarray_variable
(
topgrad
,
ctx_name
)
topgrad
=
as_gpuarray_variable
(
topgrad
,
ctx_name
)
if
kern
.
type
.
ndim
!=
4
:
if
self
.
unshared
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
6
:
raise
TypeError
(
'kern must be 6D tensor'
)
else
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
shape
is
None
:
if
shape
is
None
:
...
@@ -1014,7 +1119,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -1014,7 +1119,7 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
False
]
False
,
False
]
else
:
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
-
3
],
False
,
False
]
False
,
False
]
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
[
GpuArrayType
(
dtype
=
topgrad
.
dtype
,
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
[
GpuArrayType
(
dtype
=
topgrad
.
dtype
,
context_name
=
ctx_name
,
context_name
=
ctx_name
,
...
@@ -1034,13 +1139,15 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -1034,13 +1139,15 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
self
.
num_groups
,
top
,
self
.
unshared
)(
bottom
,
weights
.
shape
[
-
2
:])
top
,
weights
.
shape
[
-
2
:])
d_top
=
GpuCorrMM
(
self
.
border_mode
,
d_top
=
GpuCorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
d_height_width
=
(
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),
theano
.
gradient
.
DisconnectedType
()(),
)
*
2
if
len
(
inp
)
==
4
else
()
)
*
2
if
len
(
inp
)
==
4
else
()
...
@@ -1682,7 +1789,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
...
@@ -1682,7 +1789,7 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
False
,
False
]
False
,
False
,
False
]
else
:
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
-
4
],
False
,
False
,
False
]
False
,
False
,
False
]
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width_depth
,
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width_depth
,
[
GpuArrayType
(
dtype
=
topgrad
.
dtype
,
[
GpuArrayType
(
dtype
=
topgrad
.
dtype
,
...
...
theano/gpuarray/c_code/corr_gemm.c
浏览文件 @
9592125c
...
@@ -349,7 +349,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -349,7 +349,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dilW
=
1
,
const
size_t
dilW
=
1
,
const
size_t
padH
=
0
,
const
size_t
padH
=
0
,
const
size_t
padW
=
0
,
const
size_t
padW
=
0
,
const
size_t
numgroups
=
1
)
const
size_t
numgroups
=
1
,
const
size_t
unshared
=
0
)
{
{
if
(
PyGpuArray_NDIM
(
bottom
)
!=
4
)
if
(
PyGpuArray_NDIM
(
bottom
)
!=
4
)
{
{
...
@@ -368,21 +369,35 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -368,21 +369,35 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
return
NULL
;
return
NULL
;
}
}
if
(
PyGpuArray_NDIM
(
weight
)
!=
4
)
if
(
PyGpuArray_NDIM
(
weight
)
!=
(
unshared
?
6
:
4
)
)
{
{
PyErr_
SetString
(
PyExc_ValueError
,
"GpuCorrMM requires weight of 4D"
);
PyErr_
Format
(
PyExc_ValueError
,
"GpuCorrMM requires weight of %dD"
,
unshared
?
6
:
4
);
return
NULL
;
return
NULL
;
}
}
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
weight
->
ga
))
if
(
!
GpuArray_IS_C_CONTIGUOUS
(
&
weight
->
ga
))
{
{
PyErr_Format
(
PyExc_ValueError
,
if
(
unshared
)
{
"GpuCorrMM requires weight to be C-contiguous, "
PyErr_Format
(
PyExc_ValueError
,
"but strides are: %ld %ld %ld %ld
\n
"
,
"GpuCorrMM requires weight to be C-contiguous, "
PyGpuArray_STRIDES
(
weight
)[
0
],
"but strides are: %ld %ld %ld %ld %ld %ld
\n
"
,
PyGpuArray_STRIDES
(
weight
)[
1
],
PyGpuArray_STRIDES
(
weight
)[
0
],
PyGpuArray_STRIDES
(
weight
)[
2
],
PyGpuArray_STRIDES
(
weight
)[
1
],
PyGpuArray_STRIDES
(
weight
)[
3
]);
PyGpuArray_STRIDES
(
weight
)[
2
],
return
NULL
;
PyGpuArray_STRIDES
(
weight
)[
3
],
PyGpuArray_STRIDES
(
weight
)[
4
],
PyGpuArray_STRIDES
(
weight
)[
5
]);
return
NULL
;
}
else
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM requires weight to be C-contiguous, "
"but strides are: %ld %ld %ld %ld
\n
"
,
PyGpuArray_STRIDES
(
weight
)[
0
],
PyGpuArray_STRIDES
(
weight
)[
1
],
PyGpuArray_STRIDES
(
weight
)[
2
],
PyGpuArray_STRIDES
(
weight
)[
3
]);
return
NULL
;
}
}
}
if
(
PyGpuArray_NDIM
(
top
)
!=
4
)
if
(
PyGpuArray_NDIM
(
top
)
!=
4
)
...
@@ -409,10 +424,12 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -409,10 +424,12 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
bottomHeight
=
PyGpuArray_DIMS
(
bottom
)[
2
];
const
size_t
bottomHeight
=
PyGpuArray_DIMS
(
bottom
)[
2
];
const
size_t
bottomWidth
=
PyGpuArray_DIMS
(
bottom
)[
3
];
const
size_t
bottomWidth
=
PyGpuArray_DIMS
(
bottom
)[
3
];
// weights: (nFilters, nChannels, rows, columns)
// weights: (nFilters, nChannels, rows, columns)
// or (nFilters, out_rows, out_columns, nChannels, rows, columns) -> for unshared
const
size_t
nFilters
=
PyGpuArray_DIMS
(
weight
)[
0
];
const
size_t
nFilters
=
PyGpuArray_DIMS
(
weight
)[
0
];
const
size_t
kH
=
PyGpuArray_DIMS
(
weight
)[
2
];
const
size_t
kW
=
PyGpuArray_DIMS
(
weight
)[
3
];
const
size_t
kH
=
PyGpuArray_DIMS
(
weight
)[
unshared
?
4
:
2
];
if
(
nChannels
!=
(
PyGpuArray_DIMS
(
weight
)[
1
]
*
numgroups
))
{
const
size_t
kW
=
PyGpuArray_DIMS
(
weight
)[
unshared
?
5
:
3
];
if
(
nChannels
!=
PyGpuArray_DIMS
(
weight
)[
unshared
?
3
:
1
]
*
numgroups
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"GpuCorrMM images and kernel must have the same stack size
\n
"
);
"GpuCorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
return
NULL
;
...
@@ -435,21 +452,56 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -435,21 +452,56 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
size_t
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
size_t
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
const
size_t
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
#undef _CONV_FLOORDIV
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
if
(
unshared
)
{
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
if
(
topHeight
!=
PyGpuArray_DIMS
(
weight
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyGpuArray_DIMS
(
weight
)[
2
])
{
topWidth
!=
PyGpuArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM regions in kernel must match output regions:
\n
"
"GpuCorrMM shape inconsistency:
\n
"
" bottom shape: %ld %ld %ld %ld
\n
"
" bottom shape: %ld %ld %ld %ld
\n
"
" weight shape: %ld %ld %ld %ld %ld %ld"
" weight shape: %ld %ld %ld %ld
\n
"
" (expected %ld %ld %ld %ld %ld %ld)
\n
"
" top shape: %ld %ld %ld %ld (expected %ld %ld %ld %ld)
\n
"
,
" top shape(calculated): %ld %ld %ld %ld
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
nFilters
,
PyGpuArray_DIMS
(
weight
)[
1
],
PyGpuArray_DIMS
(
top
)[
0
],
PyGpuArray_DIMS
(
top
)[
1
],
PyGpuArray_DIMS
(
weight
)[
2
],
nChannels
/
numgroups
,
kH
,
kW
,
PyGpuArray_DIMS
(
top
)[
2
],
PyGpuArray_DIMS
(
top
)[
3
],
nFilters
,
topHeight
,
topWidth
,
nChannels
/
numgroups
,
kH
,
kW
,
batchSize
,
nFilters
,
topHeight
,
topWidth
);
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
return
NULL
;
}
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyGpuArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM shape inconsistency:
\n
"
" bottom shape: %ld %ld %ld %ld
\n
"
" weight shape: %ld %ld %ld %ld %ld %ld
\n
"
" top shape: %ld %ld %ld %ld (expected %ld %ld %ld %ld)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
topHeight
,
topWidth
,
nChannels
/
numgroups
,
kH
,
kW
,
PyGpuArray_DIMS
(
top
)[
0
],
PyGpuArray_DIMS
(
top
)[
1
],
PyGpuArray_DIMS
(
top
)[
2
],
PyGpuArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
}
else
{
if
(
batchSize
!=
PyGpuArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyGpuArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyGpuArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyGpuArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"GpuCorrMM shape inconsistency:
\n
"
" bottom shape: %ld %ld %ld %ld
\n
"
" weight shape: %ld %ld %ld %ld
\n
"
" top shape: %ld %ld %ld %ld (expected %ld %ld %ld %ld)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
PyGpuArray_DIMS
(
top
)[
0
],
PyGpuArray_DIMS
(
top
)[
1
],
PyGpuArray_DIMS
(
top
)[
2
],
PyGpuArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
}
}
int
err
=
gpublas_setup
(
bottom
->
context
->
ctx
);
int
err
=
gpublas_setup
(
bottom
->
context
->
ctx
);
...
@@ -512,19 +564,37 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -512,19 +564,37 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
return
NULL
;
return
NULL
;
}
}
// Second, gemm
// Second, gemm
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
if
(
unshared
)
{
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
)
{
N_
,
M_
,
K_
,
1
,
for
(
size_t
reg
=
0
;
reg
<
N_
;
++
reg
){
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
1
,
M_
,
K_
,
1
,
0
,
&
col
->
ga
,
g
*
group_col_stride
+
reg
,
N_
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
);
&
weight
->
ga
,
g
*
group_weight_stride
+
reg
*
K_
,
K_
*
N_
,
0
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
+
reg
,
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM forward encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
}
if
(
err
!=
GA_NO_ERROR
)
{
else
{
PyErr_Format
(
PyExc_RuntimeError
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
){
"GpuCorrMM forward encountered an error running gemm: %d"
,
err
);
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
Py_DECREF
(
col
);
N_
,
M_
,
K_
,
1
,
return
NULL
;
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
0
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM forward encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
}
}
}
}
...
@@ -557,19 +627,37 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -557,19 +627,37 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Note that we accumulate into weight. We do so by setting beta = 0
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
// is faster than setting weight to all zeros before the loop.)
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
if
(
unshared
)
{
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
)
{
K_
,
M_
,
N_
,
1
,
for
(
size_t
reg
=
0
;
reg
<
N_
;
++
reg
){
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
K_
,
M_
,
1
,
1
,
(
n
==
0
)
?
0
:
1
,
&
col
->
ga
,
g
*
group_col_stride
+
reg
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
);
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
+
reg
,
N_
,
(
n
==
0
)
?
0
:
1
,
&
weight
->
ga
,
g
*
group_weight_stride
+
reg
*
K_
,
K_
*
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad weights encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
}
if
(
err
!=
GA_NO_ERROR
)
{
else
{
PyErr_Format
(
PyExc_RuntimeError
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
"GpuCorrMM grad weights encountered an error running gemm: %d"
,
err
);
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
Py_DECREF
(
col
);
K_
,
M_
,
N_
,
1
,
return
NULL
;
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
(
n
==
0
)
?
0
:
1
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad weights encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
}
}
}
}
...
@@ -590,19 +678,37 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -590,19 +678,37 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// gemm into columns
// gemm into columns
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
if
(
unshared
)
{
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
){
N_
,
K_
,
M_
,
1
,
for
(
size_t
reg
=
0
;
reg
<
N_
;
++
reg
)
{
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
1
,
K_
,
M_
,
1
,
0
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
+
reg
,
N_
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
);
&
weight
->
ga
,
g
*
group_weight_stride
+
reg
*
K_
,
K_
*
N_
,
0
,
&
col
->
ga
,
g
*
group_col_stride
+
reg
,
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad inputs encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
}
if
(
err
!=
GA_NO_ERROR
)
{
else
{
PyErr_Format
(
PyExc_RuntimeError
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
){
"GpuCorrMM grad inputs encountered an error running gemm: %d"
,
err
);
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
Py_DECREF
(
col
);
N_
,
K_
,
M_
,
1
,
return
NULL
;
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
0
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
);
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad inputs encountered an error running gemm: %d"
,
err
);
Py_DECREF
(
col
);
return
NULL
;
}
}
}
}
// col2im back to the data
// col2im back to the data
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
...
...
theano/gpuarray/dnn.py
浏览文件 @
9592125c
...
@@ -3035,6 +3035,9 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
...
@@ -3035,6 +3035,9 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if
version
(
raises
=
False
)
<
6000
and
op
.
filter_dilation
!=
(
1
,
1
):
if
version
(
raises
=
False
)
<
6000
and
op
.
filter_dilation
!=
(
1
,
1
):
return
None
return
None
if
op
.
unshared
:
return
None
inp1
=
inputs
[
0
]
inp1
=
inputs
[
0
]
inp2
=
inputs
[
1
]
inp2
=
inputs
[
1
]
...
@@ -3129,6 +3132,8 @@ def local_abstractconv_cudnn(node):
...
@@ -3129,6 +3132,8 @@ def local_abstractconv_cudnn(node):
ctx
=
infer_context_name
(
*
node
.
inputs
)
ctx
=
infer_context_name
(
*
node
.
inputs
)
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
):
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
):
return
return
if
node
.
op
.
unshared
:
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d
):
if
isinstance
(
node
.
op
,
AbstractConv2d
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
elif
isinstance
(
node
.
op
,
AbstractConv3d
):
elif
isinstance
(
node
.
op
,
AbstractConv3d
):
...
@@ -3143,6 +3148,8 @@ def local_abstractconv_cudnn_alt(node):
...
@@ -3143,6 +3148,8 @@ def local_abstractconv_cudnn_alt(node):
if
version
(
raises
=
False
)
<
6000
and
node
.
op
.
filter_dilation
!=
(
1
,
1
):
if
version
(
raises
=
False
)
<
6000
and
node
.
op
.
filter_dilation
!=
(
1
,
1
):
return
None
return
None
if
node
.
op
.
unshared
:
return
None
inp1
=
node
.
inputs
[
0
]
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
inp2
=
node
.
inputs
[
1
]
...
@@ -3349,6 +3356,8 @@ def local_abstractconv_gw_cudnn(node):
...
@@ -3349,6 +3356,8 @@ def local_abstractconv_gw_cudnn(node):
ctx
=
infer_context_name
(
*
node
.
inputs
)
ctx
=
infer_context_name
(
*
node
.
inputs
)
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
):
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
):
return
return
if
node
.
op
.
unshared
:
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
if
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradWeights
):
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradWeights
):
...
@@ -3360,6 +3369,8 @@ def local_abstractconv_gi_cudnn(node):
...
@@ -3360,6 +3369,8 @@ def local_abstractconv_gi_cudnn(node):
ctx
=
infer_context_name
(
*
node
.
inputs
)
ctx
=
infer_context_name
(
*
node
.
inputs
)
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
):
if
not
isinstance
(
node
.
inputs
[
0
]
.
type
,
GpuArrayType
):
return
return
if
node
.
op
.
unshared
:
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
):
if
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradInputs
):
elif
isinstance
(
node
.
op
,
AbstractConv3d_gradInputs
):
...
...
theano/gpuarray/opt.py
浏览文件 @
9592125c
...
@@ -1595,12 +1595,17 @@ def local_abstractconv_gemm(node):
...
@@ -1595,12 +1595,17 @@ def local_abstractconv_gemm(node):
border_mode
=
node
.
op
.
border_mode
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))
and
node
.
op
.
num_groups
==
1
):
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern_axes
=
(
1
,
0
)
+
tuple
(
i
for
i
in
range
(
2
,
kern
.
ndim
))
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))
and
num_groups
==
1
and
not
unshared
):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[
:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[
flip
]
# need to dimshuffle the kernel for full convolution
# need to dimshuffle the kernel for full convolution
kern
=
kern
.
dimshuffle
(
1
,
0
,
2
,
3
)
kern
=
kern
.
dimshuffle
(
kern_axes
)
# call GpuCorrMM_gradInputs
# call GpuCorrMM_gradInputs
rval
=
GpuCorrMM_gradInputs
(
'valid'
,
rval
=
GpuCorrMM_gradInputs
(
'valid'
,
subsample
,
subsample
,
...
@@ -1609,13 +1614,14 @@ def local_abstractconv_gemm(node):
...
@@ -1609,13 +1614,14 @@ def local_abstractconv_gemm(node):
else
:
else
:
# need to flip the kernel if necessary
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
kern
=
kern
[
:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[
flip
]
# By default use GpuCorrMM
# By default use GpuCorrMM
rval
=
GpuCorrMM
(
border_mode
,
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
subsample
,
filter_dilation
,
filter_dilation
,
node
.
op
.
num_groups
)(
gpu_contiguous
(
img
),
num_groups
,
gpu_contiguous
(
kern
))
unshared
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
# call GpuCorrMM_gradWeights if good
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth
# (the latter is faster if batchsize * kernelHeight * kernelWidth
...
@@ -1628,11 +1634,12 @@ def local_abstractconv_gemm(node):
...
@@ -1628,11 +1634,12 @@ def local_abstractconv_gemm(node):
(
node
.
op
.
kshp
is
not
None
)
and
(
node
.
op
.
kshp
is
not
None
)
and
(
None
not
in
node
.
op
.
kshp
)
and
(
None
not
in
node
.
op
.
kshp
)
and
border_mode
!=
"half"
and
border_mode
!=
"half"
and
node
.
op
.
num_groups
==
1
):
num_groups
==
1
and
not
unshared
):
# we know the kernel and output size
# we know the kernel and output size
prod1
=
node
.
op
.
kshp
[
0
]
*
node
.
op
.
kshp
[
1
]
prod1
=
node
.
op
.
kshp
[
0
]
*
node
.
op
.
kshp
[
-
3
]
prod2
=
((
node
.
op
.
imshp
[
-
2
]
-
node
.
op
.
kshp
[
0
]
+
1
)
*
prod2
=
((
node
.
op
.
imshp
[
-
2
]
-
node
.
op
.
kshp
[
0
]
+
1
)
*
(
node
.
op
.
imshp
[
-
1
]
-
node
.
op
.
kshp
[
1
]
+
1
))
(
node
.
op
.
imshp
[
-
1
]
-
node
.
op
.
kshp
[
-
3
]
+
1
))
if
(
None
not
in
node
.
op
.
imshp
[:
1
]):
if
(
None
not
in
node
.
op
.
imshp
[:
1
]):
# we also know batchsize and input channels
# we also know batchsize and input channels
prod1
*=
node
.
op
.
imshp
[
0
]
prod1
*=
node
.
op
.
imshp
[
0
]
...
@@ -1666,13 +1673,19 @@ def local_abstractconv_gemm_def(node):
...
@@ -1666,13 +1673,19 @@ def local_abstractconv_gemm_def(node):
border_mode
=
node
.
op
.
border_mode
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern
=
kern
[
flip
]
rval
=
GpuCorrMM
(
border_mode
,
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
subsample
,
filter_dilation
,
filter_dilation
,
node
.
op
.
num_groups
)(
gpu_contiguous
(
img
),
num_groups
,
gpu_contiguous
(
kern
))
unshared
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
return
[
rval
]
return
[
rval
]
...
@@ -1690,8 +1703,9 @@ def local_abstractconv_gemm_alt(node):
...
@@ -1690,8 +1703,9 @@ def local_abstractconv_gemm_alt(node):
subsample
=
node
.
op
.
subsample
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
:
if
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
and
not
unshared
:
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
...
@@ -1702,7 +1716,7 @@ def local_abstractconv_gemm_alt(node):
...
@@ -1702,7 +1716,7 @@ def local_abstractconv_gemm_alt(node):
gpu_contiguous
(
kern
),
gpu_contiguous
(
img
))
gpu_contiguous
(
kern
),
gpu_contiguous
(
img
))
elif
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
filter_dilation
==
(
1
,
1
)
and
elif
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
filter_dilation
==
(
1
,
1
)
and
num_groups
==
1
):
num_groups
==
1
and
not
unshared
):
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
...
@@ -1896,10 +1910,13 @@ def local_abstractconv_gradweights_gemm(node):
...
@@ -1896,10 +1910,13 @@ def local_abstractconv_gradweights_gemm(node):
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
num_groups
=
node
.
op
.
num_groups
,
unshared
=
node
.
op
.
unshared
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
flip
=
(
slice
(
None
),)
*
(
rval
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
rval
=
rval
[
:,
:,
::
-
1
,
::
-
1
]
rval
=
rval
[
flip
]
rval
=
tensor
.
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
rval
=
tensor
.
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
rval
=
as_gpuarray_variable
(
rval
,
context_name
=
ctx
)
rval
=
as_gpuarray_variable
(
rval
,
context_name
=
ctx
)
return
[
rval
]
return
[
rval
]
...
@@ -1918,9 +1935,10 @@ def local_abstractconv_gemm_gradweights_alt(node):
...
@@ -1918,9 +1935,10 @@ def local_abstractconv_gemm_gradweights_alt(node):
subsample
=
node
.
op
.
subsample
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
filter_dilation
==
(
1
,
1
)
and
if
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
filter_dilation
==
(
1
,
1
)
and
num_groups
==
1
):
num_groups
==
1
and
not
unshared
):
rval
=
GpuCorrMM
(
border_mode
,
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
subsample
,
filter_dilation
)(
filter_dilation
)(
...
@@ -2001,12 +2019,15 @@ def local_abstractconv_gradinputs_gemm(node):
...
@@ -2001,12 +2019,15 @@ def local_abstractconv_gradinputs_gemm(node):
return
None
return
None
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern
=
kern
[
flip
]
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
num_groups
=
node
.
op
.
num_groups
,
unshared
=
node
.
op
.
unshared
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
return
[
rval
]
return
[
rval
]
...
@@ -2023,8 +2044,9 @@ def local_abstractconv_gradinputs_gemm_alt(node):
...
@@ -2023,8 +2044,9 @@ def local_abstractconv_gradinputs_gemm_alt(node):
subsample
=
node
.
op
.
subsample
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
num_groups
=
node
.
op
.
num_groups
unshared
=
node
.
op
.
unshared
if
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
:
if
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
and
not
unshared
:
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
...
@@ -2117,8 +2139,9 @@ class ConvMetaOptimizer(LocalMetaOptimizer):
...
@@ -2117,8 +2139,9 @@ class ConvMetaOptimizer(LocalMetaOptimizer):
node
.
op
.
border_mode
,
node
.
op
.
border_mode
,
node
.
op
.
subsample
,
node
.
op
.
subsample
,
node
.
op
.
filter_dilation
)
node
.
op
.
filter_dilation
)
convdim
=
img
.
ndim
-
2
result
[
kshape
]
=
theano
.
tensor
.
as_tensor_variable
(
node
.
op
.
kshp
[
2
:])
result
[
kshape
]
=
theano
.
tensor
.
as_tensor_variable
(
node
.
op
.
kshp
[
-
convdim
:])
for
(
var
,
shape
)
in
zip
((
img
,
top
),
(
node
.
op
.
imshp
,
tshp
)):
for
(
var
,
shape
)
in
zip
((
img
,
top
),
(
node
.
op
.
imshp
,
tshp
)):
result
[
var
]
=
theano
.
shared
(
np
.
random
.
random
(
shape
)
.
astype
(
var
.
dtype
),
result
[
var
]
=
theano
.
shared
(
np
.
random
.
random
(
shape
)
.
astype
(
var
.
dtype
),
...
...
theano/gpuarray/tests/test_gemmcorr.py
浏览文件 @
9592125c
...
@@ -11,7 +11,7 @@ from theano.tensor.nnet.corr import CorrMM, CorrMM_gradWeights, CorrMM_gradInput
...
@@ -11,7 +11,7 @@ from theano.tensor.nnet.corr import CorrMM, CorrMM_gradWeights, CorrMM_gradInput
from
..type
import
gpuarray_shared_constructor
from
..type
import
gpuarray_shared_constructor
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
class
TestCorrMM
(
unittest
.
TestCase
):
class
TestCorrMM
(
unittest
.
TestCase
):
...
@@ -20,9 +20,13 @@ class TestCorrMM(unittest.TestCase):
...
@@ -20,9 +20,13 @@ class TestCorrMM(unittest.TestCase):
border_mode
=
'valid'
,
border_mode
=
'valid'
,
filter_dilation
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
unshared
=
False
,
verify_grad
=
False
):
verify_grad
=
False
):
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
if
unshared
:
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
1
,
2
,
5
,
3
,
4
)]
else
:
filters_shape
=
[
filters_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
config
.
floatX
)
inputs_val
=
np
.
random
.
random
(
inputs_shape
)
.
astype
(
config
.
floatX
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
config
.
floatX
)
filters_val
=
np
.
random
.
random
(
filters_shape
)
.
astype
(
config
.
floatX
)
...
@@ -32,13 +36,15 @@ class TestCorrMM(unittest.TestCase):
...
@@ -32,13 +36,15 @@ class TestCorrMM(unittest.TestCase):
conv_ref
=
CorrMM
(
border_mode
=
border_mode
,
conv_ref
=
CorrMM
(
border_mode
=
border_mode
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
subsample
=
subsample
)(
ref_cast
(
inputs
),
subsample
=
subsample
,
ref_cast
(
filters
))
unshared
=
unshared
)(
ref_cast
(
inputs
),
ref_cast
(
filters
))
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
mode_without_gpu
)
f_ref
=
theano
.
function
([],
conv_ref
,
mode
=
mode_without_gpu
)
conv
=
GpuCorrMM
(
border_mode
=
border_mode
,
conv
=
GpuCorrMM
(
border_mode
=
border_mode
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
subsample
=
subsample
)(
inputs
,
filters
)
subsample
=
subsample
,
unshared
=
unshared
)(
inputs
,
filters
)
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([],
conv
,
mode
=
mode_with_gpu
)
res_ref
=
f_ref
()
res_ref
=
f_ref
()
...
@@ -48,7 +54,8 @@ class TestCorrMM(unittest.TestCase):
...
@@ -48,7 +54,8 @@ class TestCorrMM(unittest.TestCase):
if
verify_grad
:
if
verify_grad
:
utt
.
verify_grad
(
GpuCorrMM
(
border_mode
=
border_mode
,
utt
.
verify_grad
(
GpuCorrMM
(
border_mode
=
border_mode
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
subsample
=
subsample
),
subsample
=
subsample
,
unshared
=
unshared
),
[
inputs_val
,
filters_val
],
mode
=
mode_with_gpu
)
[
inputs_val
,
filters_val
],
mode
=
mode_with_gpu
)
def
test_valid
(
self
):
def
test_valid
(
self
):
...
@@ -57,12 +64,6 @@ class TestCorrMM(unittest.TestCase):
...
@@ -57,12 +64,6 @@ class TestCorrMM(unittest.TestCase):
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
subsample
=
(
2
,
2
))
subsample
=
(
2
,
2
))
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
subsample
=
(
2
,
2
))
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
subsample
=
(
3
,
3
))
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
filters_shape
=
(
10
,
6
,
12
,
1
),
subsample
=
(
3
,
3
))
subsample
=
(
3
,
3
))
...
@@ -117,6 +118,41 @@ class TestCorrMM(unittest.TestCase):
...
@@ -117,6 +118,41 @@ class TestCorrMM(unittest.TestCase):
border_mode
=
border_mode
,
border_mode
=
border_mode
,
verify_grad
=
True
)
verify_grad
=
True
)
def
test_unshared
(
self
):
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
15
,
1
,
6
,
12
,
1
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
8
,
1
,
6
,
12
,
1
),
subsample
=
(
2
,
2
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
5
,
1
,
6
,
12
,
1
),
subsample
=
(
3
,
3
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
5
,
1
,
6
,
12
,
1
),
subsample
=
(
3
,
2
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
15
,
1
,
6
,
12
,
1
),
subsample
=
(
1
,
2
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
15
,
1
,
6
,
12
,
1
),
border_mode
=
'valid'
,
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
21
,
13
,
6
,
12
,
1
),
border_mode
=
'half'
,
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
25
,
23
,
6
,
12
,
1
),
border_mode
=
'full'
,
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
15
,
1
,
6
,
12
,
1
),
border_mode
=
(
0
,
0
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
17
,
5
,
6
,
12
,
1
),
border_mode
=
(
1
,
2
),
unshared
=
True
)
self
.
run_conv_valid
(
inputs_shape
=
(
16
,
20
,
12
,
1
),
filters_shape
=
(
10
,
21
,
5
,
6
,
12
,
1
),
border_mode
=
(
3
,
2
),
unshared
=
True
)
def
run_gradweight
(
self
,
inputs_shape
,
filters_shape
,
dCdH_shape
,
def
run_gradweight
(
self
,
inputs_shape
,
filters_shape
,
dCdH_shape
,
subsample
=
(
1
,
1
)):
subsample
=
(
1
,
1
)):
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
inputs_shape
=
[
inputs_shape
[
i
]
for
i
in
(
0
,
3
,
1
,
2
)]
...
@@ -227,3 +263,12 @@ class TestGroupGpuCorr2d(Grouped_conv_noOptim):
...
@@ -227,3 +263,12 @@ class TestGroupGpuCorr2d(Grouped_conv_noOptim):
conv_op
=
GpuCorrMM
conv_op
=
GpuCorrMM
conv_gradw_op
=
GpuCorrMM_gradWeights
conv_gradw_op
=
GpuCorrMM_gradWeights
conv_gradi_op
=
GpuCorrMM_gradInputs
conv_gradi_op
=
GpuCorrMM_gradInputs
flip_filter
=
True
is_dnn
=
False
class
TestUnsharedGpuCorr2d
(
TestUnsharedConv
):
mode
=
mode_with_gpu
conv2d_op
=
GpuCorrMM
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradi_op
=
GpuCorrMM_gradInputs
theano/tensor/nnet/__init__.py
浏览文件 @
9592125c
...
@@ -37,7 +37,7 @@ from .abstract_conv import separable_conv2d
...
@@ -37,7 +37,7 @@ from .abstract_conv import separable_conv2d
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
**
kwargs
):
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
,
**
kwargs
):
"""
"""
This function will build the symbolic graph for convolving a mini-batch of a
This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
stack of 2D inputs with a set of 2D filters. The implementation is modelled
...
@@ -51,18 +51,22 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
...
@@ -51,18 +51,22 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
(batch size, input channels, input rows, input columns).
(batch size, input channels, input rows, input columns).
See the optional parameter ``input_shape``.
See the optional parameter ``input_shape``.
filters: symbolic 4D tensor
filters: symbolic 4D
or 6D
tensor
Set of filters used in CNN layer of shape
Set of filters used in CNN layer of shape
(output channels, input channels, filter rows, filter columns).
(output channels, input channels, filter rows, filter columns)
for normal convolution and
(output channels, output rows, output columns, input channels,
filter rows, filter columns)
for unshared convolution.
See the optional parameter ``filter_shape``.
See the optional parameter ``filter_shape``.
input_shape: None, tuple/list of len 4 of int or Constant variable
input_shape: None, tuple/list of len 4 o
r 6 o
f int or Constant variable
The shape of the input parameter.
The shape of the input parameter.
Optional, possibly used to choose an optimal implementation.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
You can give ``None`` for any element of the list to specify that this
element is not known at compile time.
element is not known at compile time.
filter_shape: None, tuple/list of len 4 of int or Constant variable
filter_shape: None, tuple/list of len 4 o
r 6 o
f int or Constant variable
The shape of the filters parameter.
The shape of the filters parameter.
Optional, possibly used to choose an optimal implementation.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
You can give ``None`` for any element of the list to specify that this
...
@@ -105,6 +109,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
...
@@ -105,6 +109,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
Divides the image, kernel and output tensors into num_groups
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
kwargs: Any other keyword arguments are accepted for backwards
kwargs: Any other keyword arguments are accepted for backwards
compatibility, but will be ignored.
compatibility, but will be ignored.
...
@@ -154,12 +163,12 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
...
@@ -154,12 +163,12 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
return
abstract_conv2d
(
input
,
filters
,
input_shape
,
filter_shape
,
return
abstract_conv2d
(
input
,
filters
,
input_shape
,
filter_shape
,
border_mode
,
subsample
,
filter_flip
,
border_mode
,
subsample
,
filter_flip
,
filter_dilation
,
num_groups
)
filter_dilation
,
num_groups
,
unshared
)
def
conv2d_transpose
(
input
,
filters
,
output_shape
,
filter_shape
=
None
,
def
conv2d_transpose
(
input
,
filters
,
output_shape
,
filter_shape
=
None
,
border_mode
=
'valid'
,
input_dilation
=
(
1
,
1
),
border_mode
=
'valid'
,
input_dilation
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
"""
"""
This function will build the symbolic graph for applying a transposed
This function will build the symbolic graph for applying a transposed
convolution over a mini-batch of a stack of 2D inputs with a set of 2D
convolution over a mini-batch of a stack of 2D inputs with a set of 2D
...
@@ -215,6 +224,12 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
...
@@ -215,6 +224,12 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
Divides the image, kernel and output tensors into num_groups
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
Grouped unshared convolution is supported.
Returns
Returns
-------
-------
Symbolic 4D tensor
Symbolic 4D tensor
...
@@ -242,4 +257,5 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
...
@@ -242,4 +257,5 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
subsample
=
input_dilation
,
subsample
=
input_dilation
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
theano/tensor/nnet/abstract_conv.py
浏览文件 @
9592125c
...
@@ -44,9 +44,14 @@ def get_conv_output_shape(image_shape, kernel_shape,
...
@@ -44,9 +44,14 @@ def get_conv_output_shape(image_shape, kernel_shape,
to: batch size, number of input channels, height and width (and
to: batch size, number of input channels, height and width (and
possibly depth) of the image. None where undefined.
possibly depth) of the image. None where undefined.
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel_shape: tuple of int (symbolic or numeric) corresponding to the
kernel shape. Its four (or five) elements must correspond respectively
kernel shape. For a normal convolution, its four (for 2D convolution)
to: number of output channels, number of input channels, height and
or five (for 3D convolution) elements must correspond respectively to :
width (and possibly depth) of the kernel. None where undefined.
number of output channels, number of input channels, height and width
(and possibly depth) of the kernel.
For an unshared 2D convolution, its six channels must correspond to :
number of output channels, height and width of the output, number of
input channels, height and width of the kernel.
None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
If it is a tuple, its two (or three) elements respectively correspond
If it is a tuple, its two (or three) elements respectively correspond
...
@@ -56,6 +61,8 @@ def get_conv_output_shape(image_shape, kernel_shape,
...
@@ -56,6 +61,8 @@ def get_conv_output_shape(image_shape, kernel_shape,
possibly depth) axis.
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two or three
filter_dilation: tuple of int (symbolic or numeric). Its two or three
elements correspond respectively to the dilation on height and width axis.
elements correspond respectively to the dilation on height and width axis.
Note - The shape of the convolution output does not depend on the 'unshared'
or the 'num_groups' parameters.
Returns
Returns
-------
-------
...
@@ -65,7 +72,10 @@ def get_conv_output_shape(image_shape, kernel_shape,
...
@@ -65,7 +72,10 @@ def get_conv_output_shape(image_shape, kernel_shape,
"""
"""
bsize
,
imshp
=
image_shape
[
0
],
image_shape
[
2
:]
bsize
,
imshp
=
image_shape
[
0
],
image_shape
[
2
:]
nkern
,
kshp
=
kernel_shape
[
0
],
kernel_shape
[
2
:]
convdim
=
len
(
image_shape
)
-
2
nkern
,
kshp
=
kernel_shape
[
0
],
kernel_shape
[
-
convdim
:]
if
filter_dilation
is
None
:
if
filter_dilation
is
None
:
filter_dilation
=
np
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
filter_dilation
=
np
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
...
@@ -139,7 +149,7 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
...
@@ -139,7 +149,7 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
def
get_conv_gradweights_shape
(
image_shape
,
top_shape
,
def
get_conv_gradweights_shape
(
image_shape
,
top_shape
,
border_mode
,
subsample
,
border_mode
,
subsample
,
filter_dilation
=
None
,
filter_dilation
=
None
,
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
"""
"""
This function tries to compute the kernel shape of convolution gradWeights.
This function tries to compute the kernel shape of convolution gradWeights.
...
@@ -169,6 +179,8 @@ def get_conv_gradweights_shape(image_shape, top_shape,
...
@@ -169,6 +179,8 @@ def get_conv_gradweights_shape(image_shape, top_shape,
width axis.
width axis.
num_groups: An int which specifies the number of separate groups to
num_groups: An int which specifies the number of separate groups to
be divided into.
be divided into.
unshared: Boolean value. If true, unshared convolution will be performed,
where a different filter is applied to each area of the input.
Returns
Returns
-------
-------
...
@@ -194,7 +206,10 @@ def get_conv_gradweights_shape(image_shape, top_shape,
...
@@ -194,7 +206,10 @@ def get_conv_gradweights_shape(image_shape, top_shape,
out_shp
=
tuple
(
get_conv_gradweights_shape_1axis
(
out_shp
=
tuple
(
get_conv_gradweights_shape_1axis
(
imshp
[
i
],
topshp
[
i
],
border_mode
,
imshp
[
i
],
topshp
[
i
],
border_mode
,
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
return
(
nchan
,
nkern
)
+
out_shp
if
unshared
:
return
(
nchan
,)
+
top_shape
[
2
:]
+
(
nkern
,)
+
out_shp
else
:
return
(
nchan
,
nkern
)
+
out_shp
def
get_conv_gradweights_shape_1axis
(
image_shape
,
top_shape
,
border_mode
,
def
get_conv_gradweights_shape_1axis
(
image_shape
,
top_shape
,
border_mode
,
...
@@ -280,6 +295,8 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
...
@@ -280,6 +295,8 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
width axis.
width axis.
num_groups: An int which specifies the number of separate groups to
num_groups: An int which specifies the number of separate groups to
be divided into.
be divided into.
Note - The shape of the convolution output does not depend on the 'unshared'
parameter.
Returns
Returns
-------
-------
...
@@ -289,7 +306,9 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
...
@@ -289,7 +306,9 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
"""
"""
bsize
,
topshp
=
top_shape
[
0
],
top_shape
[
2
:]
bsize
,
topshp
=
top_shape
[
0
],
top_shape
[
2
:]
nkern
,
kshp
=
kernel_shape
[
1
],
kernel_shape
[
2
:]
convdim
=
len
(
top_shape
)
-
2
nkern
,
kshp
=
kernel_shape
[
1
],
kernel_shape
[
-
convdim
:]
if
filter_dilation
is
None
:
if
filter_dilation
is
None
:
filter_dilation
=
np
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
filter_dilation
=
np
.
ones
(
len
(
subsample
),
dtype
=
'int'
)
...
@@ -522,7 +541,8 @@ def conv2d(input,
...
@@ -522,7 +541,8 @@ def conv2d(input,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
"""This function will build the symbolic graph for convolving a mini-batch of a
"""This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
stack of 2D inputs with a set of 2D filters. The implementation is modelled
after Convolutional Neural Networks (CNN).
after Convolutional Neural Networks (CNN).
...
@@ -538,7 +558,8 @@ def conv2d(input,
...
@@ -538,7 +558,8 @@ def conv2d(input,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
return
conv_op
(
input
,
filters
)
return
conv_op
(
input
,
filters
)
...
@@ -878,7 +899,6 @@ def conv3d(input,
...
@@ -878,7 +899,6 @@ def conv3d(input,
version until it is released.
version until it is released.
"""
"""
input
=
as_tensor_variable
(
input
)
input
=
as_tensor_variable
(
input
)
filters
=
as_tensor_variable
(
filters
)
filters
=
as_tensor_variable
(
filters
)
conv_op
=
AbstractConv3d
(
imshp
=
input_shape
,
conv_op
=
AbstractConv3d
(
imshp
=
input_shape
,
...
@@ -899,7 +919,8 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -899,7 +919,8 @@ def conv2d_grad_wrt_inputs(output_grad,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
"""Compute conv output gradient w.r.t its inputs
"""Compute conv output gradient w.r.t its inputs
This function builds the symbolic graph for getting the
This function builds the symbolic graph for getting the
...
@@ -916,10 +937,14 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -916,10 +937,14 @@ def conv2d_grad_wrt_inputs(output_grad,
will be upsampled or the output gradient of the convolution
will be upsampled or the output gradient of the convolution
whose gradient will be taken with respect to the input of the
whose gradient will be taken with respect to the input of the
convolution.
convolution.
filters : symbolic 4D tensor
filters: symbolic 4D or 6D tensor
set of filters used in CNN layer of shape (output channels,
Set of filters used in CNN layer of shape
input channels, filter rows, filter columns). See the
(output channels, input channels, filter rows, filter columns)
optional parameter ``filter_shape``.
for normal convolution and
(output channels, output rows, output columns, input channels,
filter rows, filter columns)
for unshared convolution.
See the optional parameter ``filter_shape``.
input_shape : [None/int/Constant] * 2 + [Tensor/int/Constant] * 2
input_shape : [None/int/Constant] * 2 + [Tensor/int/Constant] * 2
The shape of the input (upsampled) parameter.
The shape of the input (upsampled) parameter.
A tuple/list of len 4, with the first two dimensions
A tuple/list of len 4, with the first two dimensions
...
@@ -928,8 +953,9 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -928,8 +953,9 @@ def conv2d_grad_wrt_inputs(output_grad,
Not Optional, since given the output_grad shape
Not Optional, since given the output_grad shape
and the subsample values, multiple input_shape may be
and the subsample values, multiple input_shape may be
plausible.
plausible.
filter_shape : None or [None/int/Constant] * 4
filter_shape : None or [None/int/Constant] * (4 or 6)
The shape of the filters parameter. None or a tuple/list of len 4.
The shape of the filters parameter. None or a tuple/list of len 4 or a
tuple/list of len 6 (for unshared convolution)
Optional, possibly used to choose an optimal implementation.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that
You can give ``None`` for any element of the list to specify that
this element is not known at compile time.
this element is not known at compile time.
...
@@ -975,6 +1001,10 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -975,6 +1001,10 @@ def conv2d_grad_wrt_inputs(output_grad,
num_groups : int
num_groups : int
Divides the image, kernel and output tensors into num_groups
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
Returns
Returns
-------
-------
...
@@ -1009,7 +1039,14 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -1009,7 +1039,14 @@ def conv2d_grad_wrt_inputs(output_grad,
# checking the type of filter_shape
# checking the type of filter_shape
if
filter_shape
is
not
None
:
if
filter_shape
is
not
None
:
for
dim
in
[
0
,
1
,
2
,
3
]:
if
unshared
:
expected_dim
=
6
else
:
expected_dim
=
4
assert
len
(
filter_shape
)
==
expected_dim
for
dim
in
range
(
expected_dim
):
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorConstant
,
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorConstant
,
integer_types
,
type
(
None
)))
integer_types
,
type
(
None
)))
...
@@ -1026,7 +1063,8 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -1026,7 +1063,8 @@ def conv2d_grad_wrt_inputs(output_grad,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
return
grad_input_op
(
filters
,
output_grad
,
input_shape
[
-
2
:])
return
grad_input_op
(
filters
,
output_grad
,
input_shape
[
-
2
:])
...
@@ -1179,7 +1217,8 @@ def conv2d_grad_wrt_weights(input,
...
@@ -1179,7 +1217,8 @@ def conv2d_grad_wrt_weights(input,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
"""Compute conv output gradient w.r.t its weights
"""Compute conv output gradient w.r.t its weights
This function will build the symbolic graph for getting the
This function will build the symbolic graph for getting the
...
@@ -1195,10 +1234,10 @@ def conv2d_grad_wrt_weights(input,
...
@@ -1195,10 +1234,10 @@ def conv2d_grad_wrt_weights(input,
mini-batch of feature map stacks, of shape (batch size, input
mini-batch of feature map stacks, of shape (batch size, input
channels, input rows, input columns). This is the gradient of
channels, input rows, input columns). This is the gradient of
the output of convolution.
the output of convolution.
filter_shape : [None/int/Constant] *
2
+ [Tensor/int/Constant] * 2
filter_shape : [None/int/Constant] *
(2 or 4)
+ [Tensor/int/Constant] * 2
The shape of the filter parameter. A tuple/list of len 4
, with the
The shape of the filter parameter. A tuple/list of len 4
or 6
first two dimensions being None or int or Constant and the last two
(for unshared), with the first two dimensions being None or int or
dimensions being Tensor or int or Constant.
Constant and the last two
dimensions being Tensor or int or Constant.
Not Optional, since given the output_grad shape and
Not Optional, since given the output_grad shape and
the input_shape, multiple filter_shape may be plausible.
the input_shape, multiple filter_shape may be plausible.
input_shape : None or [None/int/Constant] * 4
input_shape : None or [None/int/Constant] * 4
...
@@ -1247,13 +1286,19 @@ def conv2d_grad_wrt_weights(input,
...
@@ -1247,13 +1286,19 @@ def conv2d_grad_wrt_weights(input,
num_groups : int
num_groups : int
Divides the image, kernel and output tensors into num_groups
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
Returns
Returns
-------
-------
symbolic 4D tensor
symbolic 4D tensor
or 6D tensor
set of feature maps generated by convolutional layer. Tensor
set of feature maps generated by convolutional layer. Tensor
is of shape (batch size, output channels, output rows, output
is of shape (batch size, output channels, output rows, output
columns)
columns) for normal convolution and
(output channels, output rows, output columns, input channels,
filter rows, filter columns) for unshared convolution
Notes
Notes
-----
-----
...
@@ -1274,7 +1319,11 @@ def conv2d_grad_wrt_weights(input,
...
@@ -1274,7 +1319,11 @@ def conv2d_grad_wrt_weights(input,
for
dim
in
[
0
,
1
]:
for
dim
in
[
0
,
1
]:
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorConstant
,
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorConstant
,
integer_types
,
type
(
None
)))
integer_types
,
type
(
None
)))
for
dim
in
[
2
,
3
]:
if
unshared
:
for
dim
in
[
2
,
3
]:
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorConstant
,
integer_types
,
type
(
None
)))
for
dim
in
[
-
2
,
-
1
]:
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorVariable
,
assert
isinstance
(
filter_shape
[
dim
],
(
theano
.
tensor
.
TensorVariable
,
theano
.
tensor
.
TensorConstant
,
theano
.
tensor
.
TensorConstant
,
integer_types
))
integer_types
))
...
@@ -1288,7 +1337,7 @@ def conv2d_grad_wrt_weights(input,
...
@@ -1288,7 +1337,7 @@ def conv2d_grad_wrt_weights(input,
# setting the last two dimensions of filter_shape to None, if
# setting the last two dimensions of filter_shape to None, if
# the type of these dimensions is TensorVariable.
# the type of these dimensions is TensorVariable.
numerical_filter_shape
=
list
(
filter_shape
)
numerical_filter_shape
=
list
(
filter_shape
)
for
dim
in
[
2
,
3
]:
for
dim
in
[
-
2
,
-
1
]:
if
isinstance
(
filter_shape
[
dim
],
theano
.
tensor
.
TensorVariable
):
if
isinstance
(
filter_shape
[
dim
],
theano
.
tensor
.
TensorVariable
):
numerical_filter_shape
[
dim
]
=
None
numerical_filter_shape
[
dim
]
=
None
...
@@ -1298,7 +1347,8 @@ def conv2d_grad_wrt_weights(input,
...
@@ -1298,7 +1347,8 @@ def conv2d_grad_wrt_weights(input,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[
-
2
:])
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[
-
2
:])
...
@@ -1631,7 +1681,8 @@ class BaseAbstractConv(Op):
...
@@ -1631,7 +1681,8 @@ class BaseAbstractConv(Op):
element is not known at compile time.
element is not known at compile time.
imshp is defined w.r.t the forward conv.
imshp is defined w.r.t the forward conv.
kshp: None, tuple/list of len ``(2 + convdim)`` of int or Constant variable
kshp: None, tuple/list of len ``(2 + convdim)`` or ``(2 + 2 * convdim)``
(for unshared) of int or Constant variable
The shape of the filters parameter.
The shape of the filters parameter.
Optional, possibly used to choose an optimal implementation.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that this
You can give ``None`` for any element of the list to specify that this
...
@@ -1670,14 +1721,24 @@ class BaseAbstractConv(Op):
...
@@ -1670,14 +1721,24 @@ class BaseAbstractConv(Op):
filter_dilation: tuple of len ``convdim``
filter_dilation: tuple of len ``convdim``
Factor by which to subsample (stride) the input.
Factor by which to subsample (stride) the input.
Also called dilation factor.
Also called dilation factor.
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
unshared: bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
"""
"""
check_broadcast
=
False
check_broadcast
=
False
__props__
=
(
'convdim'
,
'border_mode'
,
'subsample'
,
'filter_flip'
,
__props__
=
(
'convdim'
,
'border_mode'
,
'subsample'
,
'filter_flip'
,
'imshp'
,
'kshp'
,
'filter_dilation'
,
'num_groups'
)
'imshp'
,
'kshp'
,
'filter_dilation'
,
'num_groups'
,
'unshared'
)
def
__init__
(
self
,
convdim
,
def
__init__
(
self
,
convdim
,
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
None
,
filter_flip
=
True
,
filter_dilation
=
None
,
num_groups
=
1
):
subsample
=
None
,
filter_flip
=
True
,
filter_dilation
=
None
,
num_groups
=
1
,
unshared
=
False
):
self
.
convdim
=
convdim
self
.
convdim
=
convdim
if
convdim
not
in
(
2
,
3
):
if
convdim
not
in
(
2
,
3
):
...
@@ -1718,7 +1779,10 @@ class BaseAbstractConv(Op):
...
@@ -1718,7 +1779,10 @@ class BaseAbstractConv(Op):
ValueError
(
"imshp should be None or a tuple of "
ValueError
(
"imshp should be None or a tuple of "
"constant int values"
),
"constant int values"
),
sys
.
exc_info
()[
2
])
sys
.
exc_info
()[
2
])
self
.
kshp
=
tuple
(
kshp
)
if
kshp
else
(
None
,)
*
(
2
+
convdim
)
if
kshp
:
self
.
kshp
=
tuple
(
kshp
)
else
:
self
.
kshp
=
(
None
,)
*
((
2
+
2
*
convdim
)
if
unshared
else
(
2
+
convdim
))
for
kshp_i
in
self
.
kshp
:
for
kshp_i
in
self
.
kshp
:
if
kshp_i
is
not
None
:
if
kshp_i
is
not
None
:
# Components of kshp should be constant or ints
# Components of kshp should be constant or ints
...
@@ -1742,6 +1806,10 @@ class BaseAbstractConv(Op):
...
@@ -1742,6 +1806,10 @@ class BaseAbstractConv(Op):
if
num_groups
<
1
:
if
num_groups
<
1
:
raise
ValueError
(
"num_groups must have value greater than zero"
)
raise
ValueError
(
"num_groups must have value greater than zero"
)
self
.
num_groups
=
num_groups
self
.
num_groups
=
num_groups
if
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
self
.
unshared
=
unshared
def
do_constant_folding
(
self
,
node
):
def
do_constant_folding
(
self
,
node
):
# Disable constant folding since there is no implementation.
# Disable constant folding since there is no implementation.
...
@@ -1768,11 +1836,10 @@ class BaseAbstractConv(Op):
...
@@ -1768,11 +1836,10 @@ class BaseAbstractConv(Op):
raise
NotImplementedError
(
raise
NotImplementedError
(
'flops not implemented for convdim={}'
,
self
.
convdim
)
'flops not implemented for convdim={}'
,
self
.
convdim
)
def
conv
(
self
,
img
,
kern
,
mode
=
"valid"
,
dilation
=
1
,
num_groups
=
1
):
def
conv
(
self
,
img
,
kern
,
mode
=
"valid"
,
dilation
=
1
,
num_groups
=
1
,
unshared
=
False
,
direction
=
"forward"
):
"""
"""
Basic slow Python 2D or 3D convolution for DebugMode
Basic slow Python 2D or 3D convolution for DebugMode
"""
"""
if
not
imported_scipy_signal
:
if
not
imported_scipy_signal
:
raise
NotImplementedError
(
raise
NotImplementedError
(
"AbstractConv perform requires the python package"
"AbstractConv perform requires the python package"
...
@@ -1787,18 +1854,29 @@ class BaseAbstractConv(Op):
...
@@ -1787,18 +1854,29 @@ class BaseAbstractConv(Op):
raise
ValueError
(
raise
ValueError
(
'invalid dilation {}, expected {} values'
.
format
(
dilation
,
'invalid dilation {}, expected {} values'
.
format
(
dilation
,
self
.
convdim
))
self
.
convdim
))
if
unshared
and
direction
==
"backprop weights"
:
if
mode
!=
"valid"
:
raise
ValueError
(
'conv mode for unshared backprop wrt weights must be "valid"'
)
# To allow the same format for the call to 'unshared2d' for all three directions,
# the out_shape is shuffled here.
# We do a transpose in the 'perform' function to bring it to the required shape
out_shape
=
(
img
.
shape
[
0
],
kern
.
shape
[
0
],
kern
.
shape
[
2
],
kern
.
shape
[
3
],
img
.
shape
[
2
]
-
kern
.
shape
[
2
]
+
1
,
img
.
shape
[
3
]
-
kern
.
shape
[
3
]
+
1
)
else
:
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
[
1
]
*
self
.
convdim
,
dilation
)
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
[
1
]
*
self
.
convdim
,
dilation
)
out
=
np
.
zeros
(
out_shape
,
dtype
=
img
.
dtype
)
dil_kern_shp
=
kern
.
shape
[:
-
self
.
convdim
]
+
tuple
(
dil_kern_shp
=
kern
.
shape
[:
-
self
.
convdim
]
+
tuple
(
(
kern
.
shape
[
-
self
.
convdim
+
i
]
-
1
)
*
dilation
[
i
]
+
1
(
kern
.
shape
[
-
self
.
convdim
+
i
]
-
1
)
*
dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
for
i
in
range
(
self
.
convdim
))
dilated_kern
=
np
.
zeros
(
dil_kern_shp
,
dtype
=
kern
.
dtype
)
dilated_kern
=
np
.
zeros
(
dil_kern_shp
,
dtype
=
kern
.
dtype
)
dilated_kern
[(
slice
(
None
),
slice
(
None
))
+
dilated_kern
[(
slice
(
None
),)
*
(
dilated_kern
.
ndim
-
self
.
convdim
)
+
tuple
(
slice
(
None
,
None
,
dilation
[
i
])
for
i
in
range
(
self
.
convdim
))
tuple
(
slice
(
None
,
None
,
dilation
[
i
])
for
i
in
range
(
self
.
convdim
))
]
=
kern
]
=
kern
out
=
np
.
zeros
(
out_shape
,
dtype
=
img
.
dtype
)
if
img
.
shape
[
1
]
%
self
.
num_groups
!=
0
:
if
img
.
shape
[
1
]
%
self
.
num_groups
!=
0
:
raise
ValueError
(
raise
ValueError
(
...
@@ -1823,11 +1901,19 @@ class BaseAbstractConv(Op):
...
@@ -1823,11 +1901,19 @@ class BaseAbstractConv(Op):
for
g
in
xrange
(
self
.
num_groups
):
for
g
in
xrange
(
self
.
num_groups
):
for
n
in
xrange
(
output_channel_offset
):
for
n
in
xrange
(
output_channel_offset
):
for
im0
in
xrange
(
input_channel_offset
):
for
im0
in
xrange
(
input_channel_offset
):
# some cast generates a warning here
if
unshared
:
out
[
b
,
g
*
output_channel_offset
+
n
,
...
]
+=
_convolve2d
(
img
[
b
,
g
*
input_channel_offset
+
im0
,
...
],
out
[
b
,
g
*
output_channel_offset
+
n
,
...
]
+=
self
.
unshared2d
(
img
[
b
,
g
*
input_channel_offset
+
im0
,
...
],
dilated_kern
[
g
*
output_channel_offset
+
n
,
dilated_kern
[
g
*
output_channel_offset
+
n
,
im0
,
...
],
im0
,
...
],
1
,
val
,
bval
,
0
)
out_shape
[
2
:],
direction
)
else
:
# some cast generates a warning here
out
[
b
,
g
*
output_channel_offset
+
n
,
...
]
+=
_convolve2d
(
img
[
b
,
g
*
input_channel_offset
+
im0
,
...
],
dilated_kern
[
g
*
output_channel_offset
+
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
elif
self
.
convdim
==
3
:
elif
self
.
convdim
==
3
:
if
unshared
:
raise
NotImplementedError
(
'Unshared 3D convolution is not implemented'
)
for
b
in
xrange
(
img
.
shape
[
0
]):
for
b
in
xrange
(
img
.
shape
[
0
]):
for
g
in
xrange
(
self
.
num_groups
):
for
g
in
xrange
(
self
.
num_groups
):
for
n
in
xrange
(
output_channel_offset
):
for
n
in
xrange
(
output_channel_offset
):
...
@@ -1839,6 +1925,35 @@ class BaseAbstractConv(Op):
...
@@ -1839,6 +1925,35 @@ class BaseAbstractConv(Op):
raise
NotImplementedError
(
'only 2D and 3D convolution are implemented'
)
raise
NotImplementedError
(
'only 2D and 3D convolution are implemented'
)
return
out
return
out
def
unshared2d
(
self
,
inp
,
kern
,
out_shape
,
direction
=
"forward"
):
'''
Basic slow Python unshared 2d convolution.
'''
if
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
out
=
np
.
zeros
(
out_shape
,
dtype
=
inp
.
dtype
)
if
direction
==
"forward"
:
for
row
in
xrange
(
out_shape
[
0
]):
for
col
in
xrange
(
out_shape
[
1
]):
out
[
row
,
col
]
=
np
.
sum
(
np
.
multiply
(
inp
[
row
:
row
+
kern
.
shape
[
2
],
col
:
col
+
kern
.
shape
[
3
]],
kern
[
row
,
col
,
::
-
1
,
::
-
1
]))
elif
direction
==
"backprop weights"
:
for
row
in
xrange
(
out_shape
[
0
]):
for
col
in
xrange
(
out_shape
[
1
]):
out
[
row
,
col
,
...
]
=
kern
[
row
,
col
]
*
\
inp
[
row
:
row
+
out_shape
[
2
],
col
:
col
+
out_shape
[
3
]]
elif
direction
==
"backprop inputs"
:
for
row
in
xrange
(
kern
.
shape
[
0
]):
for
col
in
xrange
(
kern
.
shape
[
1
]):
out
[
row
:
row
+
kern
.
shape
[
2
],
col
:
col
+
kern
.
shape
[
3
]]
+=
inp
[
row
,
col
]
*
\
kern
[
row
,
col
,
::
-
1
,
::
-
1
]
else
:
raise
ValueError
(
"unshared2d: invalid value '{}' for 'direction'"
.
format
(
direction
))
return
out
class
AbstractConv
(
BaseAbstractConv
):
class
AbstractConv
(
BaseAbstractConv
):
""" Abstract Op for the forward convolution.
""" Abstract Op for the forward convolution.
...
@@ -1854,14 +1969,16 @@ class AbstractConv(BaseAbstractConv):
...
@@ -1854,14 +1969,16 @@ class AbstractConv(BaseAbstractConv):
subsample
=
None
,
subsample
=
None
,
filter_flip
=
True
,
filter_flip
=
True
,
filter_dilation
=
None
,
filter_dilation
=
None
,
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv
,
self
)
.
__init__
(
convdim
=
convdim
,
super
(
AbstractConv
,
self
)
.
__init__
(
convdim
=
convdim
,
imshp
=
imshp
,
kshp
=
kshp
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
def
make_node
(
self
,
img
,
kern
):
def
make_node
(
self
,
img
,
kern
):
# Make sure both inputs are Variables with the same Type
# Make sure both inputs are Variables with the same Type
...
@@ -1875,8 +1992,14 @@ class AbstractConv(BaseAbstractConv):
...
@@ -1875,8 +1992,14 @@ class AbstractConv(BaseAbstractConv):
if
img
.
type
.
ndim
!=
2
+
self
.
convdim
:
if
img
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'img must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
raise
TypeError
(
'img must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
self
.
unshared
:
if
kern
.
type
.
ndim
!=
2
+
2
*
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor for unshared convolution'
%
(
2
+
2
*
self
.
convdim
))
else
:
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
img
=
assert_shape
(
img
,
self
.
imshp
,
img
=
assert_shape
(
img
,
self
.
imshp
,
'AbstractConv shape mismatch: shape of '
'AbstractConv shape mismatch: shape of '
...
@@ -1894,8 +2017,12 @@ class AbstractConv(BaseAbstractConv):
...
@@ -1894,8 +2017,12 @@ class AbstractConv(BaseAbstractConv):
img
,
kern
=
inp
img
,
kern
=
inp
img
=
np
.
asarray
(
img
)
img
=
np
.
asarray
(
img
)
kern
=
np
.
asarray
(
kern
)
kern
=
np
.
asarray
(
kern
)
dil_kernshp
=
tuple
((
kern
.
shape
[
2
+
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
dil_kernshp
=
tuple
((
kern
.
shape
[
-
self
.
convdim
+
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
for
i
in
range
(
self
.
convdim
))
if
self
.
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
o
,
=
out_
o
,
=
out_
mode
=
self
.
border_mode
mode
=
self
.
border_mode
...
@@ -1922,8 +2049,34 @@ class AbstractConv(BaseAbstractConv):
...
@@ -1922,8 +2049,34 @@ class AbstractConv(BaseAbstractConv):
for
i
in
range
(
self
.
convdim
))]
=
img
for
i
in
range
(
self
.
convdim
))]
=
img
img
=
new_img
img
=
new_img
if
not
self
.
filter_flip
:
if
not
self
.
filter_flip
:
kern
=
kern
[(
slice
(
None
),
slice
(
None
))
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
]
kern
=
kern
[(
slice
(
None
),)
*
(
kern
.
ndim
-
self
.
convdim
)
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
]
conv_out
=
self
.
conv
(
img
,
kern
,
mode
=
"valid"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)
if
self
.
unshared
:
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
self
.
subsample
,
self
.
filter_dilation
)
if
kern
.
shape
[
1
:
1
+
self
.
convdim
]
!=
out_shape
[
2
:
2
+
self
.
convdim
]:
raise
ValueError
(
'Kernel shape {} does not match '
'computed output size {}'
.
format
(
kern
.
shape
[
1
:
1
+
self
.
convdim
],
out_shape
[
2
:
2
+
self
.
convdim
]))
if
any
(
self
.
subsample
[
i
]
>
1
for
i
in
range
(
self
.
convdim
)):
# Expand regions in kernel to correct for subsampling
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
(
1
,)
*
self
.
convdim
,
self
.
filter_dilation
)
exp_kern_shp
=
kern
.
shape
[:
1
]
+
out_shape
[
2
:
2
+
self
.
convdim
]
+
\
kern
.
shape
[
1
+
self
.
convdim
:]
exp_kern
=
np
.
zeros
(
exp_kern_shp
,
dtype
=
kern
.
dtype
)
exp_kern
[(
slice
(
None
),)
+
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
for
i
in
range
(
self
.
convdim
))
+
(
slice
(
None
),)
*
(
self
.
convdim
+
1
)]
=
kern
kern
=
exp_kern
# from (nFilters, out_rows, out_cols, nChannels, kH, kW)
# to (nFilters, nChannels, out_rows, out_cols, kH, kW)
axes_order
=
(
0
,
1
+
self
.
convdim
,)
+
tuple
(
range
(
1
,
1
+
self
.
convdim
))
+
\
tuple
(
range
(
2
+
self
.
convdim
,
kern
.
ndim
))
kern
=
kern
.
transpose
(
axes_order
)
conv_out
=
self
.
conv
(
img
,
kern
,
mode
=
"valid"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
,
unshared
=
self
.
unshared
)
conv_out
=
conv_out
[(
slice
(
None
),
slice
(
None
))
+
conv_out
=
conv_out
[(
slice
(
None
),
slice
(
None
))
+
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
for
i
in
range
(
self
.
convdim
))]
for
i
in
range
(
self
.
convdim
))]
...
@@ -1934,6 +2087,8 @@ class AbstractConv(BaseAbstractConv):
...
@@ -1934,6 +2087,8 @@ class AbstractConv(BaseAbstractConv):
if
self
.
num_groups
>
1
:
if
self
.
num_groups
>
1
:
raise
NotImplementedError
(
raise
NotImplementedError
(
'Rop not implemented for grouped convolutions'
)
'Rop not implemented for grouped convolutions'
)
if
self
.
unshared
:
raise
NotImplementedError
(
'Rop not implemented for unshared convolution'
)
rval
=
None
rval
=
None
if
eval_points
[
0
]
is
not
None
:
if
eval_points
[
0
]
is
not
None
:
rval
=
self
.
make_node
(
eval_points
[
0
],
inputs
[
1
])
.
outputs
[
0
]
rval
=
self
.
make_node
(
eval_points
[
0
],
inputs
[
1
])
.
outputs
[
0
]
...
@@ -1953,8 +2108,12 @@ class AbstractConv(BaseAbstractConv):
...
@@ -1953,8 +2108,12 @@ class AbstractConv(BaseAbstractConv):
imshp
=
[
imshp
[
i
]
if
self
.
imshp
[
i
]
is
None
else
self
.
imshp
[
i
]
imshp
=
[
imshp
[
i
]
if
self
.
imshp
[
i
]
is
None
else
self
.
imshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
for
i
in
range
(
2
+
self
.
convdim
)]
if
self
.
kshp
is
not
None
:
if
self
.
kshp
is
not
None
:
kshp
=
[
kshp
[
i
]
if
self
.
kshp
[
i
]
is
None
else
self
.
kshp
[
i
]
if
self
.
unshared
:
for
i
in
range
(
2
+
self
.
convdim
)]
kshp
=
[
kshp
[
i
]
if
self
.
kshp
[
i
]
is
None
else
self
.
kshp
[
i
]
for
i
in
range
(
2
+
2
*
self
.
convdim
)]
else
:
kshp
=
[
kshp
[
i
]
if
self
.
kshp
[
i
]
is
None
else
self
.
kshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
res
=
get_conv_output_shape
(
imshp
,
kshp
,
self
.
border_mode
,
res
=
get_conv_output_shape
(
imshp
,
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)
self
.
subsample
,
self
.
filter_dilation
)
return
[
res
]
return
[
res
]
...
@@ -1973,14 +2132,16 @@ class AbstractConv2d(AbstractConv):
...
@@ -1973,14 +2132,16 @@ class AbstractConv2d(AbstractConv):
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv2d
,
self
)
.
__init__
(
convdim
=
2
,
super
(
AbstractConv2d
,
self
)
.
__init__
(
convdim
=
2
,
imshp
=
imshp
,
kshp
=
kshp
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
bottom
,
weights
=
inp
bottom
,
weights
=
inp
...
@@ -1991,14 +2152,16 @@ class AbstractConv2d(AbstractConv):
...
@@ -1991,14 +2152,16 @@ class AbstractConv2d(AbstractConv):
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)(
num_groups
=
self
.
num_groups
,
unshared
=
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:],
add_assert_shape
=
False
)
weights
,
top
,
bottom
.
shape
[
-
2
:],
add_assert_shape
=
False
)
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)(
num_groups
=
self
.
num_groups
,
unshared
=
self
.
unshared
)(
bottom
,
top
,
weights
.
shape
[
-
2
:],
add_assert_shape
=
False
)
bottom
,
top
,
weights
.
shape
[
-
2
:],
add_assert_shape
=
False
)
...
@@ -2085,14 +2248,16 @@ class AbstractConv_gradWeights(BaseAbstractConv):
...
@@ -2085,14 +2248,16 @@ class AbstractConv_gradWeights(BaseAbstractConv):
subsample
=
None
,
subsample
=
None
,
filter_flip
=
True
,
filter_flip
=
True
,
filter_dilation
=
None
,
filter_dilation
=
None
,
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv_gradWeights
,
self
)
.
__init__
(
convdim
=
convdim
,
super
(
AbstractConv_gradWeights
,
self
)
.
__init__
(
convdim
=
convdim
,
imshp
=
imshp
,
kshp
=
kshp
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
# Update shape/height_width
# Update shape/height_width
def
make_node
(
self
,
img
,
topgrad
,
shape
,
add_assert_shape
=
True
):
def
make_node
(
self
,
img
,
topgrad
,
shape
,
add_assert_shape
=
True
):
...
@@ -2115,8 +2280,12 @@ class AbstractConv_gradWeights(BaseAbstractConv):
...
@@ -2115,8 +2280,12 @@ class AbstractConv_gradWeights(BaseAbstractConv):
'image does not match given imshp.'
)
'image does not match given imshp.'
)
shape
=
as_tensor_variable
(
shape
)
shape
=
as_tensor_variable
(
shape
)
broadcastable
=
[
topgrad
.
broadcastable
[
1
],
if
self
.
unshared
:
img
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
broadcastable
=
[
topgrad
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
+
\
[
img
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
else
:
broadcastable
=
[
topgrad
.
broadcastable
[
1
],
img
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
output
=
img
.
type
.
clone
(
broadcastable
=
broadcastable
)()
output
=
img
.
type
.
clone
(
broadcastable
=
broadcastable
)()
return
Apply
(
self
,
[
img
,
topgrad
,
shape
],
[
output
])
return
Apply
(
self
,
[
img
,
topgrad
,
shape
],
[
output
])
...
@@ -2134,6 +2303,9 @@ class AbstractConv_gradWeights(BaseAbstractConv):
...
@@ -2134,6 +2303,9 @@ class AbstractConv_gradWeights(BaseAbstractConv):
'invalid border_mode {}, which must be either '
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
'"valid", "full", "half", an integer or a tuple of'
' integers'
.
format
(
mode
))
' integers'
.
format
(
mode
))
if
self
.
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
dil_shape
=
tuple
((
shape
[
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
dil_shape
=
tuple
((
shape
[
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
for
i
in
range
(
self
.
convdim
))
...
@@ -2166,9 +2338,7 @@ class AbstractConv_gradWeights(BaseAbstractConv):
...
@@ -2166,9 +2338,7 @@ class AbstractConv_gradWeights(BaseAbstractConv):
topgrad
=
new_topgrad
topgrad
=
new_topgrad
axes_order
=
(
1
,
0
)
+
tuple
(
range
(
2
,
self
.
convdim
+
2
))
axes_order
=
(
1
,
0
)
+
tuple
(
range
(
2
,
self
.
convdim
+
2
))
flip_filters
=
((
slice
(
None
),
slice
(
None
))
+
topgrad
=
topgrad
.
transpose
(
axes_order
)
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
topgrad
=
topgrad
.
transpose
(
axes_order
)[
flip_filters
]
img
=
img
.
transpose
(
axes_order
)
img
=
img
.
transpose
(
axes_order
)
def
correct_for_groups
(
mat
):
def
correct_for_groups
(
mat
):
...
@@ -2182,15 +2352,36 @@ class AbstractConv_gradWeights(BaseAbstractConv):
...
@@ -2182,15 +2352,36 @@ class AbstractConv_gradWeights(BaseAbstractConv):
if
self
.
num_groups
>
1
:
if
self
.
num_groups
>
1
:
img
=
correct_for_groups
(
img
)
img
=
correct_for_groups
(
img
)
kern
=
self
.
conv
(
img
,
topgrad
,
mode
=
"valid"
,
num_groups
=
self
.
num_groups
)
if
self
.
unshared
:
flip_kern
=
((
slice
(
None
),)
*
(
2
+
self
.
convdim
)
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
kern
=
self
.
conv
(
img
,
topgrad
,
mode
=
"valid"
,
num_groups
=
self
.
num_groups
,
unshared
=
True
,
direction
=
"backprop weights"
)
if
any
(
self
.
subsample
[
i
]
>
1
for
i
in
range
(
self
.
convdim
)):
sub_slice
=
(
slice
(
None
),)
*
2
+
\
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
for
i
in
range
(
0
,
self
.
convdim
))
+
\
(
slice
(
None
),)
*
self
.
convdim
kern
=
kern
[
sub_slice
]
# from (nChannels, nFilters, out_rows, out_cols, kH, kW)
# to (nFilters, out_rows, out_cols, nChannels, kH, kW)
kern_axes
=
(
1
,)
+
tuple
(
range
(
2
,
self
.
convdim
+
2
))
+
(
0
,)
+
\
tuple
(
range
(
self
.
convdim
+
2
,
kern
.
ndim
))
else
:
flip_topgrad
=
flip_kern
=
((
slice
(
None
),
slice
(
None
))
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
topgrad
=
topgrad
[
flip_topgrad
]
kern
=
self
.
conv
(
img
,
topgrad
,
mode
=
"valid"
,
num_groups
=
self
.
num_groups
)
kern_axes
=
(
1
,
0
)
+
tuple
(
range
(
2
,
self
.
convdim
+
2
))
kern
=
kern
.
transpose
(
kern_axes
)
if
any
(
self
.
filter_dilation
[
i
]
>
1
for
i
in
range
(
self
.
convdim
)):
if
any
(
self
.
filter_dilation
[
i
]
>
1
for
i
in
range
(
self
.
convdim
)):
kern
=
kern
[(
slice
(
None
),
slice
(
None
)
)
+
kern
=
kern
[(
slice
(
None
),
)
*
(
kern
.
ndim
-
self
.
convdim
)
+
tuple
(
slice
(
None
,
None
,
self
.
filter_dilation
[
i
])
tuple
(
slice
(
None
,
None
,
self
.
filter_dilation
[
i
])
for
i
in
range
(
self
.
convdim
))]
for
i
in
range
(
self
.
convdim
))]
if
self
.
filter_flip
:
if
self
.
filter_flip
:
kern
=
kern
.
transpose
(
axes_order
)[
flip_filters
]
kern
=
kern
[
flip_kern
]
else
:
kern
=
kern
.
transpose
(
axes_order
)
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
kern
)
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
kern
)
def
connection_pattern
(
self
,
node
):
def
connection_pattern
(
self
,
node
):
...
@@ -2203,15 +2394,24 @@ class AbstractConv_gradWeights(BaseAbstractConv):
...
@@ -2203,15 +2394,24 @@ class AbstractConv_gradWeights(BaseAbstractConv):
# from the shapes of inputs.
# from the shapes of inputs.
imshp
=
input_shapes
[
0
]
imshp
=
input_shapes
[
0
]
topshp
=
input_shapes
[
1
]
topshp
=
input_shapes
[
1
]
kshp
=
self
.
kshp
[:]
if
self
.
kshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
if
self
.
num_groups
>
1
:
if
self
.
kshp
:
fallback_kshp
=
([
topshp
[
1
],
imshp
[
1
]
//
self
.
num_groups
]
+
kshp
=
self
.
kshp
else
:
if
self
.
unshared
:
kshp
=
[
None
]
*
(
2
+
2
*
self
.
convdim
)
else
:
kshp
=
[
None
]
*
(
2
+
self
.
convdim
)
if
self
.
unshared
:
fallback_kshp
=
([
topshp
[
1
],
topshp
[
2
],
topshp
[
3
],
imshp
[
1
]
//
self
.
num_groups
]
+
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
kshp
=
[
fallback_kshp
[
i
]
if
kshp
[
i
]
is
None
else
kshp
[
i
]
for
i
in
range
(
2
+
2
*
self
.
convdim
)]
else
:
else
:
fallback_kshp
=
([
topshp
[
1
],
imshp
[
1
]]
+
fallback_kshp
=
([
topshp
[
1
],
imshp
[
1
]
//
self
.
num_groups
]
+
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
kshp
=
[
fallback_kshp
[
i
]
if
kshp
[
i
]
is
None
else
kshp
[
i
]
kshp
=
[
fallback_kshp
[
i
]
if
kshp
[
i
]
is
None
else
kshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
for
i
in
range
(
2
+
self
.
convdim
)]
return
[
kshp
]
return
[
kshp
]
...
@@ -2232,14 +2432,16 @@ class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
...
@@ -2232,14 +2432,16 @@ class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv2d_gradWeights
,
self
)
.
__init__
(
convdim
=
2
,
super
(
AbstractConv2d_gradWeights
,
self
)
.
__init__
(
convdim
=
2
,
imshp
=
imshp
,
kshp
=
kshp
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
bottom
,
top
=
inp
[:
2
]
bottom
,
top
=
inp
[:
2
]
...
@@ -2249,16 +2451,18 @@ class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
...
@@ -2249,16 +2451,18 @@ class AbstractConv2d_gradWeights(AbstractConv_gradWeights):
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
self
.
num_groups
,
top
,
self
.
unshared
)(
weights
,
bottom
.
shape
[
-
2
:])
top
,
bottom
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
# Make sure that the broadcastable pattern of the inputs is used
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
# that the dimensions are broadcastable.
...
@@ -2350,14 +2554,16 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2350,14 +2554,16 @@ class AbstractConv_gradInputs(BaseAbstractConv):
subsample
=
None
,
subsample
=
None
,
filter_flip
=
True
,
filter_flip
=
True
,
filter_dilation
=
None
,
filter_dilation
=
None
,
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv_gradInputs
,
self
)
.
__init__
(
convdim
=
convdim
,
super
(
AbstractConv_gradInputs
,
self
)
.
__init__
(
convdim
=
convdim
,
imshp
=
imshp
,
kshp
=
kshp
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
# Update shape/height_width
# Update shape/height_width
def
make_node
(
self
,
kern
,
topgrad
,
shape
,
add_assert_shape
=
True
):
def
make_node
(
self
,
kern
,
topgrad
,
shape
,
add_assert_shape
=
True
):
...
@@ -2370,10 +2576,19 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2370,10 +2576,19 @@ class AbstractConv_gradInputs(BaseAbstractConv):
broadcastable
=
topgrad
.
broadcastable
)
broadcastable
=
topgrad
.
broadcastable
)
topgrad
=
gtype
.
filter_variable
(
topgrad
)
topgrad
=
gtype
.
filter_variable
(
topgrad
)
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
if
self
.
unshared
:
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
elif
kern
.
type
.
ndim
!=
2
+
2
*
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor for unshared convolution'
%
(
2
+
2
*
self
.
convdim
))
else
:
if
kern
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'kern must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
topgrad
.
type
.
ndim
!=
2
+
self
.
convdim
:
if
topgrad
.
type
.
ndim
!=
2
+
self
.
convdim
:
raise
TypeError
(
'topgrad must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
raise
TypeError
(
'topgrad must be
%
dD tensor'
%
(
2
+
self
.
convdim
))
if
add_assert_shape
:
if
add_assert_shape
:
kern
=
assert_shape
(
kern
,
self
.
kshp
,
kern
=
assert_shape
(
kern
,
self
.
kshp
,
...
@@ -2386,7 +2601,7 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2386,7 +2601,7 @@ class AbstractConv_gradInputs(BaseAbstractConv):
False
]
+
([
False
]
*
self
.
convdim
)
False
]
+
([
False
]
*
self
.
convdim
)
else
:
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
]]
+
([
False
]
*
self
.
convdim
)
kern
.
type
.
broadcastable
[
-
self
.
convdim
-
1
]]
+
([
False
]
*
self
.
convdim
)
output
=
kern
.
type
.
clone
(
broadcastable
=
broadcastable
)()
output
=
kern
.
type
.
clone
(
broadcastable
=
broadcastable
)()
return
Apply
(
self
,
[
kern
,
topgrad
,
shape
],
[
output
])
return
Apply
(
self
,
[
kern
,
topgrad
,
shape
],
[
output
])
...
@@ -2403,9 +2618,12 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2403,9 +2618,12 @@ class AbstractConv_gradInputs(BaseAbstractConv):
'invalid border_mode {}, which must be either '
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
'"valid", "full", "half", an integer or a tuple of'
' integers'
.
format
(
mode
))
' integers'
.
format
(
mode
))
if
self
.
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
imshp
=
self
.
imshp
[:]
if
self
.
imshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
imshp
=
self
.
imshp
[:]
if
self
.
imshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
fallback_imshp
=
([
topgrad
.
shape
[
0
],
kern
.
shape
[
1
]]
+
fallback_imshp
=
([
topgrad
.
shape
[
0
],
kern
.
shape
[
-
self
.
convdim
-
1
]]
+
[
shape
[
i
]
for
i
in
range
(
self
.
convdim
)])
[
shape
[
i
]
for
i
in
range
(
self
.
convdim
)])
imshp
=
[
fallback_imshp
[
i
]
if
imshp
[
i
]
is
None
else
imshp
[
i
]
imshp
=
[
fallback_imshp
[
i
]
if
imshp
[
i
]
is
None
else
imshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
for
i
in
range
(
2
+
self
.
convdim
)]
...
@@ -2419,8 +2637,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2419,8 +2637,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
'has shape {}'
.
format
(
tuple
(
expected_topgrad_shape
),
'has shape {}'
.
format
(
tuple
(
expected_topgrad_shape
),
tuple
(
topgrad
.
shape
)))
tuple
(
topgrad
.
shape
)))
dil_kernshp
=
tuple
((
kern
.
shape
[
i
+
2
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
dil_kernshp
=
tuple
((
kern
.
shape
[
-
self
.
convdim
+
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
for
i
in
range
(
self
.
convdim
))
pad
=
(
0
,)
*
self
.
convdim
pad
=
(
0
,)
*
self
.
convdim
if
mode
==
"full"
:
if
mode
==
"full"
:
pad
=
tuple
(
dil_kernshp
[
i
]
-
1
for
i
in
range
(
self
.
convdim
))
pad
=
tuple
(
dil_kernshp
[
i
]
-
1
for
i
in
range
(
self
.
convdim
))
...
@@ -2438,25 +2657,54 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2438,25 +2657,54 @@ class AbstractConv_gradInputs(BaseAbstractConv):
for
i
in
range
(
self
.
convdim
))]
=
topgrad
for
i
in
range
(
self
.
convdim
))]
=
topgrad
topgrad
=
new_topgrad
topgrad
=
new_topgrad
axes_order
=
(
1
,
0
)
+
tuple
(
range
(
2
,
self
.
convdim
+
2
))
if
self
.
unshared
:
flip_filters
=
((
slice
(
None
),
slice
(
None
))
+
# Expand regions in kernel to correct for subsampling
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
exp_kern_shp
=
kern
.
shape
[:
1
]
+
topgrad
.
shape
[
2
:]
+
kern
.
shape
[
1
+
self
.
convdim
:]
exp_kern
=
np
.
zeros
(
exp_kern_shp
,
dtype
=
kern
.
dtype
)
exp_kern
[(
slice
(
None
),)
+
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
for
i
in
range
(
self
.
convdim
))
+
(
slice
(
None
),)
*
(
self
.
convdim
+
1
)]
=
kern
kern
=
exp_kern
def
correct_for_groups
(
mat
):
def
correct_for_groups
(
mat
):
mshp0
=
mat
.
shape
[
0
]
//
self
.
num_groups
mshp0
=
mat
.
shape
[
0
]
//
self
.
num_groups
mshp1
=
mat
.
shape
[
1
]
*
self
.
num_groups
mshp1
=
mat
.
shape
[
-
self
.
convdim
-
1
]
*
self
.
num_groups
mat
=
mat
.
reshape
((
self
.
num_groups
,
mshp0
)
+
mat
.
shape
[
1
:])
mat
=
mat
.
reshape
((
self
.
num_groups
,
mshp0
)
+
mat
.
shape
[
1
:])
mat
=
mat
.
transpose
((
1
,
0
,
2
)
+
tuple
(
range
(
3
,
3
+
self
.
convdim
)))
if
self
.
unshared
:
mat
=
mat
.
reshape
((
mshp0
,
mshp1
)
+
mat
.
shape
[
-
self
.
convdim
:])
# for 2D -> (1, 2, 3, 0, 4, 5, 6)
mat
=
mat
.
transpose
(
tuple
(
range
(
1
,
2
+
self
.
convdim
))
+
(
0
,)
+
tuple
(
range
(
2
+
self
.
convdim
,
mat
.
ndim
)))
mat
=
mat
.
reshape
((
mshp0
,)
+
mat
.
shape
[
1
:
1
+
self
.
convdim
]
+
(
mshp1
,)
+
mat
.
shape
[
-
self
.
convdim
:])
else
:
mat
=
mat
.
transpose
((
1
,
0
,
2
)
+
tuple
(
range
(
3
,
3
+
self
.
convdim
)))
mat
=
mat
.
reshape
((
mshp0
,
mshp1
)
+
mat
.
shape
[
-
self
.
convdim
:])
return
mat
return
mat
kern
=
correct_for_groups
(
kern
)
kern
=
correct_for_groups
(
kern
)
kern
=
kern
.
transpose
(
axes_order
)
if
self
.
filter_flip
:
if
self
.
unshared
:
topgrad
=
topgrad
[
flip_filters
]
# from (nFilters, out_rows, out_cols, nChannels, kH, kW)
img
=
self
.
conv
(
topgrad
,
kern
,
mode
=
"full"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)
# to (nChannels, nFilters, out_rows, out_cols, kH, kW)
if
self
.
filter_flip
:
axes_order
=
(
1
+
self
.
convdim
,
0
,)
+
tuple
(
range
(
1
,
1
+
self
.
convdim
))
+
\
img
=
img
[
flip_filters
]
tuple
(
range
(
2
+
self
.
convdim
,
kern
.
ndim
))
kern
=
kern
.
transpose
(
axes_order
)
if
not
self
.
filter_flip
:
kern
=
kern
[(
slice
(
None
),)
*
(
kern
.
ndim
-
self
.
convdim
)
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
]
img
=
self
.
conv
(
topgrad
,
kern
,
mode
=
"full"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
,
unshared
=
True
,
direction
=
"backprop inputs"
)
else
:
axes_order
=
(
1
,
0
)
+
tuple
(
range
(
2
,
2
+
self
.
convdim
))
kern
=
kern
.
transpose
(
axes_order
)
flip_filters
=
((
slice
(
None
),
slice
(
None
))
+
(
slice
(
None
,
None
,
-
1
),)
*
self
.
convdim
)
if
self
.
filter_flip
:
topgrad
=
topgrad
[
flip_filters
]
img
=
self
.
conv
(
topgrad
,
kern
,
mode
=
"full"
,
dilation
=
self
.
filter_dilation
,
num_groups
=
self
.
num_groups
)
if
self
.
filter_flip
:
img
=
img
[
flip_filters
]
if
any
(
p
>
0
for
p
in
pad
):
if
any
(
p
>
0
for
p
in
pad
):
img
=
img
[(
slice
(
None
),
slice
(
None
))
+
img
=
img
[(
slice
(
None
),
slice
(
None
))
+
tuple
(
slice
(
pad
[
i
],
img
.
shape
[
i
+
2
]
-
pad
[
i
])
tuple
(
slice
(
pad
[
i
],
img
.
shape
[
i
+
2
]
-
pad
[
i
])
...
@@ -2475,10 +2723,10 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2475,10 +2723,10 @@ class AbstractConv_gradInputs(BaseAbstractConv):
topshp
=
input_shapes
[
1
]
topshp
=
input_shapes
[
1
]
imshp
=
self
.
imshp
[:]
if
self
.
imshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
imshp
=
self
.
imshp
[:]
if
self
.
imshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
if
self
.
num_groups
>
1
:
if
self
.
num_groups
>
1
:
fallback_imshp
=
([
topshp
[
0
],
kshp
[
1
]
*
self
.
num_groups
]
+
fallback_imshp
=
([
topshp
[
0
],
kshp
[
-
self
.
convdim
-
1
]
*
self
.
num_groups
]
+
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
else
:
else
:
fallback_imshp
=
([
topshp
[
0
],
kshp
[
1
]]
+
fallback_imshp
=
([
topshp
[
0
],
kshp
[
-
self
.
convdim
-
1
]]
+
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
[
node
.
inputs
[
2
][
i
]
for
i
in
range
(
self
.
convdim
)])
imshp
=
[
fallback_imshp
[
i
]
if
imshp
[
i
]
is
None
else
imshp
[
i
]
imshp
=
[
fallback_imshp
[
i
]
if
imshp
[
i
]
is
None
else
imshp
[
i
]
for
i
in
range
(
2
+
self
.
convdim
)]
for
i
in
range
(
2
+
self
.
convdim
)]
...
@@ -2503,14 +2751,16 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
...
@@ -2503,14 +2751,16 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
):
num_groups
=
1
,
unshared
=
False
):
super
(
AbstractConv2d_gradInputs
,
self
)
.
__init__
(
convdim
=
2
,
super
(
AbstractConv2d_gradInputs
,
self
)
.
__init__
(
convdim
=
2
,
imshp
=
imshp
,
kshp
=
kshp
,
imshp
=
imshp
,
kshp
=
kshp
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
num_groups
=
num_groups
,
unshared
=
unshared
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
weights
,
top
=
inp
[:
2
]
weights
,
top
=
inp
[:
2
]
...
@@ -2520,7 +2770,8 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
...
@@ -2520,7 +2770,8 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
self
.
num_groups
,
self
.
unshared
)(
bottom
,
top
,
bottom
,
top
,
weights
.
shape
[
-
2
:])
weights
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
...
@@ -2528,7 +2779,8 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
...
@@ -2528,7 +2779,8 @@ class AbstractConv2d_gradInputs(AbstractConv_gradInputs):
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_flip
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
# Make sure that the broadcastable pattern of the inputs is used
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
# that the dimensions are broadcastable.
...
...
theano/tensor/nnet/c_code/corr_gemm.c
浏览文件 @
9592125c
...
@@ -107,7 +107,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -107,7 +107,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
dilW
=
1
,
const
int
dilW
=
1
,
const
int
padH
=
0
,
const
int
padH
=
0
,
const
int
padW
=
0
,
const
int
padW
=
0
,
const
int
numgroups
=
1
)
const
int
numgroups
=
1
,
const
int
unshared
=
0
)
{
{
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
{
{
...
@@ -120,9 +121,9 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -120,9 +121,9 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
return
NULL
;
return
NULL
;
}
}
if
(
PyArray_NDIM
(
weight
)
!=
4
)
if
(
PyArray_NDIM
(
weight
)
!=
(
unshared
?
6
:
4
)
)
{
{
PyErr_
SetString
(
PyExc_ValueError
,
"CorrMM requires weight of 4D"
);
PyErr_
Format
(
PyExc_ValueError
,
"CorrMM requires weight of %%dD"
,
unshared
?
6
:
4
);
return
NULL
;
return
NULL
;
}
}
if
(
PyArray_TYPE
(
weight
)
!=
%
(
float_typenum
)
s
)
if
(
PyArray_TYPE
(
weight
)
!=
%
(
float_typenum
)
s
)
...
@@ -152,11 +153,12 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -152,11 +153,12 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
nChannels
=
PyArray_DIMS
(
bottom
)[
1
];
const
int
nChannels
=
PyArray_DIMS
(
bottom
)[
1
];
const
int
bottomHeight
=
PyArray_DIMS
(
bottom
)[
2
];
const
int
bottomHeight
=
PyArray_DIMS
(
bottom
)[
2
];
const
int
bottomWidth
=
PyArray_DIMS
(
bottom
)[
3
];
const
int
bottomWidth
=
PyArray_DIMS
(
bottom
)[
3
];
// weights: (nFilters, nChannels, rows, columns)
// normal weights: (nFilters, nChannels, rows, columns)
// unshared weights: (nFilters, topHeight, topWidth, nChannels, rows, columns)
const
int
nFilters
=
PyArray_DIMS
(
weight
)[
0
];
const
int
nFilters
=
PyArray_DIMS
(
weight
)[
0
];
const
int
kH
=
PyArray_DIMS
(
weight
)[
2
];
const
int
kH
=
PyArray_DIMS
(
weight
)[
unshared
?
4
:
2
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
3
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
unshared
?
5
:
3
];
if
(
nChannels
!=
(
PyArray_DIMS
(
weight
)[
1
]
*
numgroups
)
)
{
if
(
nChannels
!=
PyArray_DIMS
(
weight
)[
unshared
?
3
:
1
]
*
numgroups
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM images and kernel must have the same stack size
\n
"
);
"CorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
return
NULL
;
...
@@ -179,22 +181,57 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -179,22 +181,57 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
int
topHeight
=
_CONV_FLOORDIV_X
(
topHeightNoDH
,
dH
)
+
1
;
const
int
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
const
int
topWidth
=
_CONV_FLOORDIV_X
(
topWidthNoDW
,
dW
)
+
1
;
#undef _CONV_FLOORDIV
#undef _CONV_FLOORDIV
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
if
(
unshared
)
{
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
if
(
topHeight
!=
PyArray_DIMS
(
weight
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyArray_DIMS
(
weight
)[
2
])
{
topWidth
!=
PyArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
PyErr_Format
(
PyExc_ValueError
,
"CorrMM regions in kernel must match output regions:
\n
"
"CorrMM shape inconsistency:
\n
"
" bottom shape: %%d %%d %%d %%d
\n
"
" bottom shape: %%d %%d %%d %%d
\n
"
" weight shape: %%d %%ld %%ld %%d %%d %%d"
" weight shape: %%d %%d %%d %%d
\n
"
" (expected %%d %%d %%d %%d %%d %%d)
\n
"
" top shape: %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d)
\n
"
,
" top shape(calculated): %%d %%d %%d %%d
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
nFilters
,
PyArray_DIMS
(
weight
)[
1
],
PyArray_DIMS
(
top
)[
0
],
PyArray_DIMS
(
top
)[
1
],
PyArray_DIMS
(
weight
)[
2
],
nChannels
/
numgroups
,
kH
,
kW
,
PyArray_DIMS
(
top
)[
2
],
PyArray_DIMS
(
top
)[
3
],
nFilters
,
topHeight
,
topWidth
,
nChannels
/
numgroups
,
kH
,
kW
,
batchSize
,
nFilters
,
topHeight
,
topWidth
);
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
return
NULL
;
}
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"CorrMM shape inconsistency:
\n
"
" bottom shape: %%d %%d %%d %%d
\n
"
" weight shape: %%d %%d %%d %%d %%d %%d
\n
"
" top shape: %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
topHeight
,
topWidth
,
nChannels
/
numgroups
,
kH
,
kW
,
PyArray_DIMS
(
top
)[
0
],
PyArray_DIMS
(
top
)[
1
],
PyArray_DIMS
(
top
)[
2
],
PyArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
}
}
else
{
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topWidth
!=
PyArray_DIMS
(
top
)[
3
])
{
PyErr_Format
(
PyExc_ValueError
,
"CorrMM shape inconsistency:
\n
"
" bottom shape: %%d %%d %%d %%d
\n
"
" weight shape: %%d %%d %%d %%d
\n
"
" top shape: %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
PyArray_DIMS
(
top
)[
0
],
PyArray_DIMS
(
top
)[
1
],
PyArray_DIMS
(
top
)[
2
],
PyArray_DIMS
(
top
)[
3
],
batchSize
,
nFilters
,
topHeight
,
topWidth
);
return
NULL
;
}
}
// Create temporary columns
// Create temporary columns
int
max_threads
=
%
(
omp_get_max_threads
)
s
;
int
max_threads
=
%
(
omp_get_max_threads
)
s
;
...
@@ -230,8 +267,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -230,8 +267,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
group_col_stride
=
(
K_
*
N_
);
const
int
group_col_stride
=
(
K_
*
N_
);
const
int
group_weight_stride
=
(
PyArray_STRIDES
(
weight
)[
0
]
*
nFilters
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
group_weight_stride
=
(
PyArray_STRIDES
(
weight
)[
0
]
*
nFilters
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
M_
=
nFilters
/
numgroups
;
const
int
M_
=
nFilters
/
numgroups
;
const
int
one_int
=
1
;
const
%
(
c_float_type
)
s
one
=
1
.
0
;
const
%
(
c_float_type
)
s
one
=
1
.
0
;
const
%
(
c_float_type
)
s
zero
=
0
.
0
;
const
%
(
c_float_type
)
s
zero
=
0
.
0
;
const
int
ldw
=
(
K_
*
N_
);
char
NTrans
=
'N'
;
char
NTrans
=
'N'
;
char
Trans
=
'T'
;
char
Trans
=
'T'
;
PyArrayObject
*
output
;
PyArrayObject
*
output
;
...
@@ -266,15 +305,30 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -266,15 +305,30 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
nChannels
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
// Second, gemm
if
(
unshared
)
{
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
)
{
&
N_
,
&
M_
,
&
K_
,
for
(
int
reg
=
0
;
reg
<
N_
;
++
reg
)
{
&
one
,
%
(
gemv
)
s
(
&
Trans
,
&
K_
,
&
M_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
+
reg
*
K_
,
&
ldw
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
+
reg
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
,
&
N_
);
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
+
reg
,
&
N_
);
}
}
}
else
{
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
&
N_
,
&
M_
,
&
K_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
,
&
N_
);
}
}
}
}
}
// Restore to previous blas threads
// Restore to previous blas threads
...
@@ -316,7 +370,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -316,7 +370,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
output
=
weight
;
output
=
weight
;
npy_intp
weight_dim
[
2
];
npy_intp
weight_dim
[
2
];
weight_dim
[
0
]
=
(
npy_intp
)
max_threads
;
weight_dim
[
0
]
=
(
npy_intp
)
max_threads
;
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
*
numgroups
);
if
(
unshared
)
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
N_
*
K_
*
numgroups
);
else
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
*
numgroups
);
PyArrayObject
*
local_weight
=
(
PyArrayObject
*
)
PyArray_ZEROS
(
2
,
PyArrayObject
*
local_weight
=
(
PyArrayObject
*
)
PyArray_ZEROS
(
2
,
weight_dim
,
PyArray_TYPE
(
weight
),
0
);
weight_dim
,
PyArray_TYPE
(
weight
),
0
);
...
@@ -341,26 +398,46 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -341,26 +398,46 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
// is faster than setting weight to all zeros before the loop.)
if
(
unshared
)
{
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
)
{
&
K_
,
&
M_
,
&
N_
,
for
(
int
reg
=
0
;
reg
<
N_
;
++
reg
)
{
&
one
,
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
&
K_
,
&
M_
,
&
one_int
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
&
one
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
+
reg
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
g
*
group_weight_stride
+
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
+
reg
,
&
N_
,
tid
*
weight_dim
[
1
],
&
K_
);
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
g
*
group_weight_stride
+
reg
*
K_
+
tid
*
weight_dim
[
1
],
&
ldw
);
}
}
}
else
{
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
&
K_
,
&
M_
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
g
*
group_weight_stride
+
tid
*
weight_dim
[
1
],
&
K_
);
}
}
}
}
}
// Restore to previous blas threads
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
//aggregate weights
//aggregate weights
memset
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
0
,
M_
*
K_
*
sizeof
(
%
(
float_type
)
s
));
memset
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
0
,
weight_dim
[
1
]
*
sizeof
(
%
(
float_type
)
s
));
/*
/*
* Put index "j" into outer loop to get the
* Put index "j" into outer loop to get the
* correct result when openmp is used.
* correct result when openmp is used.
...
@@ -416,15 +493,29 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -416,15 +493,29 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
%
(
omp_flags
)
s
%
(
omp_flags
)
s
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
int
tid
=
%
(
omp_get_thread_num
)
s
;
int
tid
=
%
(
omp_get_thread_num
)
s
;
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
if
(
unshared
)
{
// gemm into columns
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
for
(
int
reg
=
0
;
reg
<
N_
;
++
reg
){
&
N_
,
&
K_
,
&
M_
,
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
&
one
,
&
one_int
,
&
K_
,
&
M_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
+
reg
,
&
N_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
+
reg
*
K_
,
&
ldw
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
);
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
+
reg
,
&
N_
);
}
}
}
else
{
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
)
{
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
&
N_
,
&
K_
,
&
M_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
);
}
}
}
// col2im back to the data
// col2im back to the data
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
...
@@ -474,4 +565,3 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -474,4 +565,3 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
// in here output is just aliased to one of bottom, weights, or top.
// in here output is just aliased to one of bottom, weights, or top.
return
output
;
return
output
;
}
}
theano/tensor/nnet/corr.py
浏览文件 @
9592125c
...
@@ -8,7 +8,7 @@ import theano
...
@@ -8,7 +8,7 @@ import theano
from
theano
import
Apply
from
theano
import
Apply
from
theano
import
gof
from
theano
import
gof
from
theano.gof
import
ParamsType
,
EnumList
from
theano.gof
import
ParamsType
,
EnumList
from
theano.scalar
import
int64
from
theano.scalar
import
int64
,
int8
from
theano.tensor
import
as_tensor_variable
,
TensorType
from
theano.tensor
import
as_tensor_variable
,
TensorType
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
from
theano.tensor.nnet.abstract_conv
import
get_conv_output_shape
from
theano.tensor
import
blas_headers
from
theano.tensor
import
blas_headers
...
@@ -42,9 +42,11 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -42,9 +42,11 @@ class BaseCorrMM(gof.OpenMPOp):
Perform dilated correlation (default: (1,1))
Perform dilated correlation (default: (1,1))
num_groups
num_groups
Perform grouped convolutions (default: 1)
Perform grouped convolutions (default: 1)
unshared
Perform unshared correlation (default: False)
"""
"""
check_broadcast
=
False
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
,
'unshared'
)
_direction
=
None
_direction
=
None
...
@@ -54,10 +56,10 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -54,10 +56,10 @@ class BaseCorrMM(gof.OpenMPOp):
dH
=
int64
,
dW
=
int64
,
dH
=
int64
,
dW
=
int64
,
dilH
=
int64
,
dilW
=
int64
,
dilH
=
int64
,
dilW
=
int64
,
padH
=
int64
,
padW
=
int64
,
padH
=
int64
,
padW
=
int64
,
num_groups
=
int64
)
num_groups
=
int64
,
unshared
=
int8
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
openmp
=
None
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
,
openmp
=
None
):
super
(
BaseCorrMM
,
self
)
.
__init__
(
openmp
=
openmp
)
super
(
BaseCorrMM
,
self
)
.
__init__
(
openmp
=
openmp
)
if
isinstance
(
border_mode
,
integer_types
):
if
isinstance
(
border_mode
,
integer_types
):
if
border_mode
<
0
:
if
border_mode
<
0
:
...
@@ -85,6 +87,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -85,6 +87,7 @@ class BaseCorrMM(gof.OpenMPOp):
raise
ValueError
(
"filter_dilation must have two elements"
)
raise
ValueError
(
"filter_dilation must have two elements"
)
self
.
subsample
=
tuple
(
subsample
)
self
.
subsample
=
tuple
(
subsample
)
self
.
filter_dilation
=
tuple
(
filter_dilation
)
self
.
filter_dilation
=
tuple
(
filter_dilation
)
self
.
unshared
=
unshared
if
not
theano
.
config
.
blas
.
ldflags
:
if
not
theano
.
config
.
blas
.
ldflags
:
# Theano will use a NumPy C implementation of [sd]gemm_ instead.
# Theano will use a NumPy C implementation of [sd]gemm_ instead.
...
@@ -130,12 +133,13 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -130,12 +133,13 @@ class BaseCorrMM(gof.OpenMPOp):
padW
=
property
(
lambda
self
:
self
.
pad
[
1
])
padW
=
property
(
lambda
self
:
self
.
pad
[
1
])
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s,
%
s,
%
s
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
,
self
.
border_mode
,
self
.
border_mode
,
str
(
self
.
subsample
),
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
),
str
(
self
.
filter_dilation
),
str
(
self
.
num_groups
))
str
(
self
.
num_groups
),
str
(
self
.
unshared
))
@staticmethod
@staticmethod
def
as_common_dtype
(
in1
,
in2
):
def
as_common_dtype
(
in1
,
in2
):
...
@@ -179,7 +183,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -179,7 +183,7 @@ class BaseCorrMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
7
,
self
.
openmp
,
blas_header_version
())
return
(
9
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -189,12 +193,14 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -189,12 +193,14 @@ class BaseCorrMM(gof.OpenMPOp):
assert
dtype
in
(
'float32'
,
'float64'
)
assert
dtype
in
(
'float32'
,
'float64'
)
if
dtype
==
'float32'
:
if
dtype
==
'float32'
:
sub
[
'gemm'
]
=
'sgemm_'
sub
[
'gemm'
]
=
'sgemm_'
sub
[
'gemv'
]
=
'sgemv_'
sub
[
'float_type'
]
=
'npy_float'
sub
[
'float_type'
]
=
'npy_float'
sub
[
'float_typenum'
]
=
'NPY_FLOAT'
sub
[
'float_typenum'
]
=
'NPY_FLOAT'
sub
[
'n_bytes'
]
=
4
sub
[
'n_bytes'
]
=
4
sub
[
'c_float_type'
]
=
'float'
sub
[
'c_float_type'
]
=
'float'
else
:
else
:
sub
[
'gemm'
]
=
'dgemm_'
sub
[
'gemm'
]
=
'dgemm_'
sub
[
'gemv'
]
=
'dgemv_'
sub
[
'float_type'
]
=
'npy_double'
sub
[
'float_type'
]
=
'npy_double'
sub
[
'float_typenum'
]
=
'NPY_DOUBLE'
sub
[
'float_typenum'
]
=
'NPY_DOUBLE'
sub
[
'n_bytes'
]
=
8
sub
[
'n_bytes'
]
=
8
...
@@ -287,6 +293,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -287,6 +293,7 @@ class BaseCorrMM(gof.OpenMPOp):
int padH =
%(params)
s->padH;
int padH =
%(params)
s->padH;
int padW =
%(params)
s->padW;
int padW =
%(params)
s->padW;
int numgroups =
%(params)
s->num_groups;
int numgroups =
%(params)
s->num_groups;
int unshared =
%(params)
s->unshared;
PyArrayObject * bottom =
%(bottom)
s;
PyArrayObject * bottom =
%(bottom)
s;
PyArrayObject * weights =
%(weights)
s;
PyArrayObject * weights =
%(weights)
s;
...
@@ -310,13 +317,17 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -310,13 +317,17 @@ class BaseCorrMM(gof.OpenMPOp):
break;
break;
}
}
int wdim, odim;
wdim = unshared ? 6 : 4;
odim = 4; //Can be set to 6 later for unshared backprop wrt weights
// Obtain or infer kernel width and height
// Obtain or infer kernel width and height
// (we need to know it early to be able to handle auto-padding)
// (we need to know it early to be able to handle auto-padding)
int kH, kW, dil_kH, dil_kW;
int kH, kW, dil_kH, dil_kW;
if (direction != 1) {
if (direction != 1) {
// weight is an input variable, we can just read its shape
// weight is an input variable, we can just read its shape
kH = PyArray_DIMS(weights)[2];
kH = PyArray_DIMS(weights)[
wdim-
2];
kW = PyArray_DIMS(weights)[
3
];
kW = PyArray_DIMS(weights)[
wdim-1
];
}
}
else {
else {
if (
%(height)
s != -1) {
if (
%(height)
s != -1) {
...
@@ -370,28 +381,45 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -370,28 +381,45 @@ class BaseCorrMM(gof.OpenMPOp):
}
}
// Infer output shape
// Infer output shape
npy_intp out_dim[4];
npy_intp out_dim[6];
out_dim[4] = out_dim[5] = 0; //Only used for unshared backprop wrt weights
switch(direction) {
switch(direction) {
case 0: // forward pass
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
// height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1);
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[
wdim-
2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[
3
]-1)*dilW + 1)) / dW + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[
wdim-1
]-1)*dilW + 1)) / dW + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
{
PyErr_Format(PyExc_ValueError,
if (unshared) {
"CorrMM: impossible output shape
\\
n"
PyErr_Format(PyExc_ValueError,
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
"CorrMM: impossible output shape
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)out_dim[3]);
(long int)PyArray_DIMS(weights)[4], (long int)PyArray_DIMS(weights)[5],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
}
else {
PyErr_Format(PyExc_ValueError,
"CorrMM: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3]);
}
%(fail)
s
%(fail)
s
}
}
break;
break;
...
@@ -399,46 +427,90 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -399,46 +427,90 @@ class BaseCorrMM(gof.OpenMPOp):
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1] / numgroups;
if (unshared){
out_dim[2] = (npy_intp)kH; // already inferred further above
odim = 6;
out_dim[3] = (npy_intp)kW; // how convenient
out_dim[1] = (npy_intp)PyArray_DIMS(top)[2];
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
out_dim[2] = (npy_intp)PyArray_DIMS(top)[3];
{
}
PyErr_Format(PyExc_ValueError,
out_dim[wdim-3] = (npy_intp)PyArray_DIMS(bottom)[1] / numgroups;
"CorrMM backprop wrt. weights: impossible output shape
\\
n"
out_dim[wdim-2] = (npy_intp)kH; // already inferred further above
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
out_dim[wdim-1] = (npy_intp)kW; // how convenient
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
if (unshared) {
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
if (out_dim[0] < 0 || out_dim[1] <= 0 || out_dim[2] <= 0 || out_dim[3] < 0
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
|| out_dim[4] <= 0 || out_dim[5] <= 0){
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
PyErr_Format(PyExc_ValueError,
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
"CorrMM backprop wrt. weights: impossible output shape
\\
n"
(long int)out_dim[3],
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
%(fail)
s
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3], (long int)out_dim[4], (long int)out_dim[5],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
}
else {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. weights: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
(long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
}
}
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1] * numgroups;
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - 2*padW);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (unshared) {
{
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
PyErr_Format(PyExc_ValueError,
{
"CorrMM backprop wrt. inputs: impossible output shape
\\
n"
PyErr_Format(PyExc_ValueError,
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
"CorrMM backprop wrt. inputs: impossible output shape
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)out_dim[3],
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
(long int)PyArray_DIMS(weights)[4], (long int)PyArray_DIMS(weights)[5],
%(fail)
s
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
}
else {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
PyErr_Format(PyExc_ValueError,
"CorrMM backprop wrt. inputs: impossible output shape
\\
n"
" bottom shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" weights shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n"
" top shape:
%%
ld x
%%
ld x
%%
ld x
%%
ld
\\
n",
(long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
(long int)out_dim[3],
(long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
(long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
(long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
(long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
%(fail)
s
}
}
}
break;
break;
default:
default:
...
@@ -448,13 +520,19 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -448,13 +520,19 @@ class BaseCorrMM(gof.OpenMPOp):
// Prepare output array
// Prepare output array
int typenum;
int typenum;
if ( !(*out
int failure;
&& PyArray_NDIM(*out)==4
failure = !(*out
&& PyArray_NDIM(*out)==odim
&& PyArray_IS_C_CONTIGUOUS(*out)
&& PyArray_IS_C_CONTIGUOUS(*out)
&& PyArray_DIMS(*out)[0]==out_dim[0]
&& PyArray_DIMS(*out)[0]==out_dim[0]
&& PyArray_DIMS(*out)[1]==out_dim[1]
&& PyArray_DIMS(*out)[1]==out_dim[1]
&& PyArray_DIMS(*out)[2]==out_dim[2]
&& PyArray_DIMS(*out)[2]==out_dim[2]
&& PyArray_DIMS(*out)[3]==out_dim[3]))
&& PyArray_DIMS(*out)[3]==out_dim[3]);
if (odim == 6){
failure = failure || !(PyArray_DIMS(*out)[4]==out_dim[4]
&& PyArray_DIMS(*out)[5]==out_dim[5]);
}
if ( failure )
{
{
Py_XDECREF(*out);
Py_XDECREF(*out);
if (direction != 1) {
if (direction != 1) {
...
@@ -464,21 +542,29 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -464,21 +542,29 @@ class BaseCorrMM(gof.OpenMPOp):
typenum = PyArray_TYPE(bottom);
typenum = PyArray_TYPE(bottom);
}
}
//Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
//Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
*out = (PyArrayObject*)PyArray_ZEROS(
4
,
*out = (PyArrayObject*)PyArray_ZEROS(
odim
,
out_dim,
out_dim,
typenum,
typenum,
0);
0);
if (NULL == *out)
if (NULL == *out)
{
{
PyErr_Format(PyExc_RuntimeError,
if (odim == 4) {
"BaseCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld",
PyErr_Format(PyExc_RuntimeError,
(long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3]);
"BaseCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld",
(long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3]);
}
if (odim == 6) {
PyErr_Format(PyExc_RuntimeError,
"BaseCorrMM: Failed to allocate output of
%%
lld x
%%
lld x
%%
lld x
%%
lld
%%
lld
%%
lld",
(long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3],
(long long)out_dim[4], (long long)out_dim[5]);
}
%(fail)
s
%(fail)
s
}
}
}
}
// Call corrMM code
// Call corrMM code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups
);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups
, unshared
);
if (out2==NULL){
if (out2==NULL){
%(fail)
s
%(fail)
s
}
}
...
@@ -514,6 +600,13 @@ class CorrMM(BaseCorrMM):
...
@@ -514,6 +600,13 @@ class CorrMM(BaseCorrMM):
The filter dilation operation applied to each input image.
The filter dilation operation applied to each input image.
Should be a tuple with 2 elements.
Should be a tuple with 2 elements.
Set to `(1, 1)` to disable filter dilation.
Set to `(1, 1)` to disable filter dilation.
num_groups
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately.
Should be an integer.
unshared
Boolean value. If true, then a different filter will be applied to
each region of the input image.
"""
"""
...
@@ -525,8 +618,12 @@ class CorrMM(BaseCorrMM):
...
@@ -525,8 +618,12 @@ class CorrMM(BaseCorrMM):
img
,
kern
=
self
.
as_common_dtype
(
img
,
kern
)
img
,
kern
=
self
.
as_common_dtype
(
img
,
kern
)
if
img
.
type
.
ndim
!=
4
:
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
raise
TypeError
(
'img must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
4
:
if
self
.
unshared
is
True
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
6
:
raise
TypeError
(
'kern must be 6D tensor'
)
else
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
broadcastable
=
[
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
broadcastable
=
[
img
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
0
],
False
,
False
]
False
,
False
]
...
@@ -555,13 +652,15 @@ class CorrMM(BaseCorrMM):
...
@@ -555,13 +652,15 @@ class CorrMM(BaseCorrMM):
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
top
,
self
.
num_groups
,
bottom
.
shape
[
-
2
:])
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
top
,
self
.
num_groups
,
weights
.
shape
[
-
2
:])
self
.
unshared
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
return
d_bottom
,
d_weights
...
@@ -595,8 +694,12 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -595,8 +694,12 @@ class CorrMM_gradWeights(BaseCorrMM):
else
:
else
:
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
1
],
img
.
type
.
broadcastable
[
1
],
if
self
.
unshared
is
True
:
False
,
False
]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
img
.
type
.
broadcastable
[
1
],
False
,
False
]
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
img
.
type
.
broadcastable
[
1
],
False
,
False
]
dtype
=
img
.
type
.
dtype
dtype
=
img
.
type
.
dtype
return
Apply
(
self
,
[
img
,
topgrad
]
+
height_width
,
return
Apply
(
self
,
[
img
,
topgrad
]
+
height_width
,
[
TensorType
(
dtype
,
broadcastable
)()])
[
TensorType
(
dtype
,
broadcastable
)()])
...
@@ -633,7 +736,10 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -633,7 +736,10 @@ class CorrMM_gradWeights(BaseCorrMM):
kW
=
2
-
imshp
[
1
]
+
(
topshp
[
1
]
-
1
)
*
dW
kW
=
2
-
imshp
[
1
]
+
(
topshp
[
1
]
-
1
)
*
dW
else
:
else
:
kW
=
imshp
[
1
]
+
2
*
padW
-
(
topshp
[
1
]
-
1
)
*
dW
kW
=
imshp
[
1
]
+
2
*
padW
-
(
topshp
[
1
]
-
1
)
*
dW
return
[(
nkern
,
ssize
,
kH
,
kW
)]
if
self
.
unshared
is
True
:
return
[(
nkern
,
topshp
[
0
],
topshp
[
1
],
ssize
,
kH
,
kW
)]
else
:
return
[(
nkern
,
ssize
,
kH
,
kW
)]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
bottom
,
top
=
inp
[:
2
]
bottom
,
top
=
inp
[:
2
]
...
@@ -649,12 +755,14 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -649,12 +755,14 @@ class CorrMM_gradWeights(BaseCorrMM):
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
top
,
self
.
num_groups
,
bottom
.
shape
[
-
2
:])
self
.
unshared
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
if
len
(
inp
)
==
4
else
())
return
(
d_bottom
,
d_top
)
+
d_height_width
return
(
d_bottom
,
d_top
)
+
d_height_width
...
@@ -684,8 +792,12 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -684,8 +792,12 @@ class CorrMM_gradInputs(BaseCorrMM):
kern
=
as_tensor_variable
(
kern
)
kern
=
as_tensor_variable
(
kern
)
topgrad
=
as_tensor_variable
(
topgrad
)
topgrad
=
as_tensor_variable
(
topgrad
)
kern
,
topgrad
=
self
.
as_common_dtype
(
kern
,
topgrad
)
kern
,
topgrad
=
self
.
as_common_dtype
(
kern
,
topgrad
)
if
kern
.
type
.
ndim
!=
4
:
if
self
.
unshared
is
True
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
6
:
raise
TypeError
(
'kern must be 6D tensor'
)
else
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'topgrad must be 4D tensor'
)
raise
TypeError
(
'topgrad must be 4D tensor'
)
if
shape
is
None
:
if
shape
is
None
:
...
@@ -700,7 +812,7 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -700,7 +812,7 @@ class CorrMM_gradInputs(BaseCorrMM):
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
False
]
False
,
False
]
else
:
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
-
3
],
False
,
False
]
False
,
False
]
dtype
=
kern
.
type
.
dtype
dtype
=
kern
.
type
.
dtype
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
...
@@ -719,7 +831,7 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -719,7 +831,7 @@ class CorrMM_gradInputs(BaseCorrMM):
dH
,
dW
=
self
.
subsample
dH
,
dW
=
self
.
subsample
kshp
=
input_shape
[
0
]
kshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
topshp
=
input_shape
[
1
]
ssize
,
kshp
=
kshp
[
1
],
list
(
kshp
[
2
:])
ssize
,
kshp
=
kshp
[
-
3
],
list
(
kshp
[
-
2
:])
ssize
=
ssize
*
self
.
num_groups
ssize
=
ssize
*
self
.
num_groups
bsize
,
topshp
=
topshp
[
0
],
list
(
topshp
[
2
:])
bsize
,
topshp
=
topshp
[
0
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
height_width
=
node
.
inputs
[
-
2
:]
...
@@ -762,13 +874,15 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -762,13 +874,15 @@ class CorrMM_gradInputs(BaseCorrMM):
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
self
.
num_groups
,
top
,
self
.
unshared
)(
bottom
,
weights
.
shape
[
-
2
:])
top
,
weights
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
,
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
self
.
num_groups
,
self
.
unshared
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
2
if
len
(
inp
)
==
4
else
())
return
(
d_weights
,
d_top
)
+
d_height_width
return
(
d_weights
,
d_top
)
+
d_height_width
...
...
theano/tensor/nnet/opt.py
浏览文件 @
9592125c
...
@@ -82,12 +82,14 @@ def local_abstractconv_gemm(node):
...
@@ -82,12 +82,14 @@ def local_abstractconv_gemm(node):
# need to flip the kernel if necessary
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern
=
kern
[
flip
]
rval
=
CorrMM
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
CorrMM
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
kern
)
num_groups
=
node
.
op
.
num_groups
,
unshared
=
node
.
op
.
unshared
)(
img
,
kern
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
return
[
rval
]
...
@@ -134,12 +136,15 @@ def local_abstractconv_gradweight_gemm(node):
...
@@ -134,12 +136,15 @@ def local_abstractconv_gradweight_gemm(node):
rval
=
CorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
CorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
topgrad
,
shape
)
num_groups
=
node
.
op
.
num_groups
,
unshared
=
node
.
op
.
unshared
)(
img
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
# need to flip the kernel if necessary
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
rval
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
rval
=
rval
[
flip
]
rval
=
theano
.
tensor
.
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
rval
=
theano
.
tensor
.
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
...
@@ -189,12 +194,14 @@ def local_abstractconv_gradinputs_gemm(node):
...
@@ -189,12 +194,14 @@ def local_abstractconv_gradinputs_gemm(node):
# need to flip the kernel if necessary
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
flip
=
(
slice
(
None
),)
*
(
kern
.
ndim
-
2
)
+
\
(
slice
(
None
,
None
,
-
1
),)
*
2
kern
=
kern
[
flip
]
rval
=
CorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
CorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
,
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
kern
,
topgrad
,
num_groups
=
node
.
op
.
num_groups
,
shape
)
unshared
=
node
.
op
.
unshared
)(
kern
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
return
[
rval
]
...
@@ -242,7 +249,7 @@ def local_conv2d_cpu(node):
...
@@ -242,7 +249,7 @@ def local_conv2d_cpu(node):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
# Not tested yet
# Not tested yet
return
None
return
None
if
node
.
op
.
num_groups
>
1
:
if
node
.
op
.
num_groups
>
1
or
node
.
op
.
unshared
:
return
None
return
None
rval
=
conv2d
(
img
,
kern
,
rval
=
conv2d
(
img
,
kern
,
...
@@ -270,7 +277,7 @@ def local_conv2d_gradweight_cpu(node):
...
@@ -270,7 +277,7 @@ def local_conv2d_gradweight_cpu(node):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
# Not tested yet
# Not tested yet
return
return
if
node
.
op
.
num_groups
>
1
:
if
node
.
op
.
num_groups
>
1
or
node
.
op
.
unshared
:
return
None
return
None
if
node
.
op
.
border_mode
==
'valid'
and
\
if
node
.
op
.
border_mode
==
'valid'
and
\
...
@@ -370,7 +377,7 @@ def local_conv2d_gradinputs_cpu(node):
...
@@ -370,7 +377,7 @@ def local_conv2d_gradinputs_cpu(node):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
# Not tested yet
# Not tested yet
return
None
return
None
if
node
.
op
.
num_groups
>
1
:
if
node
.
op
.
num_groups
>
1
or
node
.
op
.
unshared
:
return
None
return
None
# Conv 3d implementation, needed when subsample > 2
# Conv 3d implementation, needed when subsample > 2
...
...
theano/tensor/nnet/tests/test_abstract_conv.py
浏览文件 @
9592125c
...
@@ -1744,3 +1744,154 @@ class Separable_conv(unittest.TestCase):
...
@@ -1744,3 +1744,154 @@ class Separable_conv(unittest.TestCase):
fun
=
theano
.
function
([
x_sym
,
dfilter_sym
,
pfilter_sym
],
sep_op
,
mode
=
'FAST_RUN'
)
fun
=
theano
.
function
([
x_sym
,
dfilter_sym
,
pfilter_sym
],
sep_op
,
mode
=
'FAST_RUN'
)
top
=
fun
(
x
[:,
:,
:
3
,
:
3
,
:
3
],
depthwise_filter
,
pointwise_filter
)
top
=
fun
(
x
[:,
:,
:
3
,
:
3
,
:
3
],
depthwise_filter
,
pointwise_filter
)
utt
.
assert_allclose
(
top
,
precomp_output
)
utt
.
assert_allclose
(
top
,
precomp_output
)
class
TestUnsharedConv
(
unittest
.
TestCase
):
conv2d
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
conv2d_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
mode
=
theano
.
compile
.
mode
.
Mode
(
optimizer
=
'None'
)
def
setUp
(
self
):
self
.
img_shape
=
[(
2
,
2
,
4
,
4
),
(
3
,
2
,
4
,
2
),
(
3
,
3
,
5
,
3
),
(
3
,
4
,
4
,
4
)]
self
.
kern_shape
=
[(
2
,
2
,
2
,
2
,
3
,
3
),
(
2
,
4
,
2
,
2
,
4
,
2
),
(
3
,
2
,
1
,
1
,
3
,
3
),
(
4
,
3
,
3
,
2
,
4
,
2
)]
self
.
topgrad_shape
=
[(
2
,
2
,
2
,
2
),
(
3
,
2
,
4
,
2
),
(
3
,
3
,
2
,
1
),
(
3
,
4
,
3
,
3
)]
self
.
border_mode
=
[
'valid'
,
'full'
,
'valid'
,
'full'
]
self
.
subsample
=
[(
1
,
1
),
(
2
,
2
),
(
2
,
1
),
(
3
,
2
)]
self
.
filter_dilation
=
(
1
,
1
)
self
.
num_groups
=
[
1
,
1
,
3
,
2
]
# self.verify_flags = np.random.choice([True, False], 4, [0.5, 0.5])
# Above line can be used instead if speed is a concern
self
.
verify_flags
=
[
True
]
*
4
self
.
ref_mode
=
'FAST_RUN'
if
theano
.
config
.
cxx
==
""
:
raise
SkipTest
(
"CorrMM needs cxx"
)
def
test_fwd
(
self
):
tensor6
=
theano
.
tensor
.
TensorType
(
theano
.
config
.
floatX
,
(
False
,)
*
6
)
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
kern_sym
=
tensor6
(
'kern'
)
ref_kern_sym
=
theano
.
tensor
.
tensor4
(
'ref_kern'
)
for
imshp
,
kshp
,
mode
,
sub
,
groups
,
verify
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
border_mode
,
self
.
subsample
,
self
.
num_groups
,
self
.
verify_flags
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
unshared_conv_op
=
self
.
conv2d
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
True
)
unshared_out_sym
=
unshared_conv_op
(
img_sym
,
kern_sym
)
unshared_func
=
theano
.
function
([
img_sym
,
kern_sym
],
unshared_out_sym
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_op
)
for
node
in
unshared_func
.
maker
.
fgraph
.
toposort
()])
unshared_output
=
unshared_func
(
img
,
kern
)
single_kshp
=
kshp
[:
1
]
+
kshp
[
3
:]
ref_conv_op
=
self
.
conv2d
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
False
)
ref_out_sym
=
ref_conv_op
(
img_sym
,
ref_kern_sym
)
ref_func
=
theano
.
function
([
img_sym
,
ref_kern_sym
],
ref_out_sym
,
mode
=
self
.
mode
)
for
i
in
range
(
0
,
kshp
[
1
]):
for
j
in
range
(
0
,
kshp
[
2
]):
single_kern
=
kern
[:,
i
,
j
,
...
]
.
reshape
(
single_kshp
)
ref_val
=
ref_func
(
img
,
single_kern
)
utt
.
assert_allclose
(
ref_val
[:,
:,
i
,
j
],
unshared_output
[:,
:,
i
,
j
])
if
verify
:
utt
.
verify_grad
(
unshared_conv_op
,
[
img
,
kern
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradweight
(
self
):
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
for
imshp
,
kshp
,
topshp
,
mode
,
sub
,
groups
,
verify
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
topgrad_shape
,
self
.
border_mode
,
self
.
subsample
,
self
.
num_groups
,
self
.
verify_flags
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
topshp
)
.
astype
(
theano
.
config
.
floatX
)
unshared_conv_op
=
self
.
conv2d_gradw
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
True
)
unshared_out_sym
=
unshared_conv_op
(
img_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
kshp
[
-
2
:]))
unshared_func
=
theano
.
function
([
img_sym
,
top_sym
],
unshared_out_sym
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradw_op
)
for
node
in
unshared_func
.
maker
.
fgraph
.
toposort
()])
unshared_output
=
unshared_func
(
img
,
top
)
single_kshp
=
kshp
[:
1
]
+
kshp
[
3
:]
ref_conv_op
=
self
.
conv2d_gradw
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
False
)
ref_out_sym
=
ref_conv_op
(
img_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
single_kshp
[
-
2
:]))
ref_func
=
theano
.
function
([
img_sym
,
top_sym
],
ref_out_sym
,
mode
=
self
.
mode
)
for
i
in
range
(
0
,
topshp
[
2
]):
for
j
in
range
(
0
,
topshp
[
3
]):
top_single
=
np
.
zeros_like
(
top
)
top_single
[:,
:,
i
,
j
]
=
top
[:,
:,
i
,
j
]
ref_output
=
ref_func
(
img
,
top_single
)
utt
.
assert_allclose
(
unshared_output
[:,
i
,
j
,
...
],
ref_output
)
def
conv_gradweight
(
inputs_val
,
output_val
):
return
unshared_conv_op
(
inputs_val
,
output_val
,
tensor
.
as_tensor_variable
(
kshp
[
-
2
:]))
if
verify
:
utt
.
verify_grad
(
conv_gradweight
,
[
img
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradinput
(
self
):
tensor6
=
theano
.
tensor
.
TensorType
(
theano
.
config
.
floatX
,
(
False
,)
*
6
)
kern_sym
=
tensor6
(
'kern'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
ref_kern_sym
=
theano
.
tensor
.
tensor4
(
'ref_kern'
)
for
imshp
,
kshp
,
topshp
,
mode
,
sub
,
groups
,
verify
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
topgrad_shape
,
self
.
border_mode
,
self
.
subsample
,
self
.
num_groups
,
self
.
verify_flags
):
single_kshp
=
kshp
[:
1
]
+
kshp
[
3
:]
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
topshp
)
.
astype
(
theano
.
config
.
floatX
)
unshared_conv_op
=
self
.
conv2d_gradi
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
True
)
unshared_out_sym
=
unshared_conv_op
(
kern_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
imshp
[
-
2
:]))
unshared_func
=
theano
.
function
([
kern_sym
,
top_sym
],
unshared_out_sym
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradi_op
)
for
node
in
unshared_func
.
maker
.
fgraph
.
toposort
()])
unshared_output
=
unshared_func
(
kern
,
top
)
ref_conv_op
=
self
.
conv2d_gradi
(
border_mode
=
mode
,
subsample
=
sub
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
,
unshared
=
False
)
ref_out_sym
=
ref_conv_op
(
ref_kern_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
imshp
[
-
2
:]))
ref_func
=
theano
.
function
([
ref_kern_sym
,
top_sym
],
ref_out_sym
,
mode
=
self
.
mode
)
ref_output
=
np
.
zeros
(
imshp
)
for
i
in
range
(
0
,
topshp
[
2
]):
for
j
in
range
(
0
,
topshp
[
3
]):
single_kern
=
kern
[:,
i
,
j
,
...
]
.
reshape
(
single_kshp
)
top_single
=
np
.
zeros_like
(
top
)
top_single
[:,
:,
i
,
j
]
=
top
[:,
:,
i
,
j
]
ref_output
+=
ref_func
(
single_kern
,
top_single
)
utt
.
assert_allclose
(
ref_output
,
unshared_output
)
def
conv_gradinputs
(
filters_val
,
output_val
):
return
unshared_conv_op
(
filters_val
,
output_val
,
tensor
.
as_tensor_variable
(
imshp
[
-
2
:]))
if
verify
:
utt
.
verify_grad
(
conv_gradinputs
,
[
kern
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
9592125c
...
@@ -10,7 +10,7 @@ import theano
...
@@ -10,7 +10,7 @@ import theano
import
theano.tensor
as
T
import
theano.tensor
as
T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
class
TestCorr2D
(
utt
.
InferShapeTester
):
class
TestCorr2D
(
utt
.
InferShapeTester
):
...
@@ -452,6 +452,16 @@ class TestGroupCorr2d(Grouped_conv_noOptim):
...
@@ -452,6 +452,16 @@ class TestGroupCorr2d(Grouped_conv_noOptim):
utt
.
assert_allclose
(
gconv_output
,
conv_output
)
utt
.
assert_allclose
(
gconv_output
,
conv_output
)
class
TestUnsharedCorr2d
(
TestUnsharedConv
):
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
.
excluding
(
'gpuarray'
)
else
:
mode
=
None
conv2d_op
=
corr
.
CorrMM
conv2d_gradw_op
=
corr
.
CorrMM_gradWeights
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
t
=
TestCorr2D
(
'setUp'
)
t
=
TestCorr2D
(
'setUp'
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论