Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
ea62004b
提交
ea62004b
authored
9月 15, 2017
作者:
Frédéric Bastien
提交者:
GitHub
9月 15, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6331 from vikramnitin9/dilated_causal
Dilated causal convolution
上级
c3c477df
321e2fc6
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
725 行增加
和
273 行删除
+725
-273
blas.py
theano/gpuarray/blas.py
+65
-42
corr_gemm.c
theano/gpuarray/c_code/corr_gemm.c
+33
-30
dnn.py
theano/gpuarray/dnn.py
+16
-0
test_gemmcorr.py
theano/gpuarray/tests/test_gemmcorr.py
+12
-0
__init__.py
theano/tensor/nnet/__init__.py
+6
-3
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+295
-105
corr_gemm.c
theano/tensor/nnet/c_code/corr_gemm.c
+21
-18
corr.py
theano/tensor/nnet/corr.py
+105
-75
test_abstract_conv.py
theano/tensor/nnet/tests/test_abstract_conv.py
+154
-0
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+18
-0
没有找到文件。
theano/gpuarray/blas.py
浏览文件 @
ea62004b
...
@@ -449,8 +449,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -449,8 +449,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
Parameters
Parameters
----------
----------
border_mode : {'valid', 'full', 'half'}
border_mode : {'valid', 'full', 'half'}
Additionally, the padding size could be directly specified by an integer
Additionally, the padding size could be directly specified by an integer
,
or a pair of integers
a pair of integers, or two pairs of integers.
subsample
subsample
Perform subsampling of the output (default: (1, 1)).
Perform subsampling of the output (default: (1, 1)).
filter_dilation
filter_dilation
...
@@ -468,16 +468,33 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -468,16 +468,33 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
unshared
=
False
):
if
isinstance
(
border_mode
,
integer_types
):
if
isinstance
(
border_mode
,
integer_types
):
border_mode
=
(
border_mode
,
border_mode
)
if
border_mode
<
0
:
if
isinstance
(
border_mode
,
tuple
):
raise
ValueError
(
pad_h
,
pad_w
=
map
(
int
,
border_mode
)
'invalid border_mode {}, which must be a '
border_mode
=
(
pad_h
,
pad_w
)
'non-negative integer'
.
format
(
border_mode
))
if
not
((
isinstance
(
border_mode
,
tuple
)
and
min
(
border_mode
)
>=
0
)
or
border_mode
=
((
border_mode
,
border_mode
),)
*
2
border_mode
in
(
'valid'
,
'full'
,
'half'
)):
elif
isinstance
(
border_mode
,
tuple
):
if
len
(
border_mode
)
!=
2
:
raise
ValueError
(
'invalid border_mode {} which must be a '
'tuple of length 2'
.
format
(
border_mode
))
border
=
()
for
mode
in
border_mode
:
if
isinstance
(
mode
,
tuple
)
and
len
(
mode
)
==
2
and
\
min
(
mode
)
>=
0
:
border
+=
((
int
(
mode
[
0
]),
int
(
mode
[
1
])),)
elif
mode
>=
0
:
border
+=
((
int
(
mode
),
int
(
mode
)),)
else
:
raise
ValueError
(
'invalid border mode {}. The tuple can only contain '
'integers or tuples of length 2'
.
format
(
border_mode
))
border_mode
=
border
elif
border_mode
not
in
(
'valid'
,
'full'
,
'half'
):
raise
ValueError
(
raise
ValueError
(
'invalid border_mode {}, which must be either '
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a
pair of
'
'"valid", "full", "half", an integer or a
tuple
'
'
integers
'
.
format
(
border_mode
))
'
of length 2
'
.
format
(
border_mode
))
self
.
border_mode
=
border_mode
self
.
border_mode
=
border_mode
if
len
(
subsample
)
!=
2
:
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
raise
ValueError
(
"subsample must have two elements"
)
...
@@ -495,7 +512,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -495,7 +512,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
pad
(
self
):
def
pad
(
self
):
if
self
.
border_mode
!=
'valid'
:
if
self
.
border_mode
!=
'valid'
:
return
self
.
border_mode
return
self
.
border_mode
return
(
0
,
0
)
return
(
(
0
,
0
),)
*
2
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s,
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s,
%
s,
%
s,
%
s}'
%
(
...
@@ -537,7 +554,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -537,7 +554,7 @@ class BaseGpuCorrMM(CGpuKernelBase):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# Raise this whenever modifying the C code (including the file).
# Raise this whenever modifying the C code (including the file).
return
(
1
1
,)
return
(
1
2
,)
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
def
c_code_helper
(
self
,
bottom
,
weights
,
top
,
direction
,
sub
,
height
=
None
,
width
=
None
):
"""
"""
...
@@ -587,14 +604,14 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -587,14 +604,14 @@ class BaseGpuCorrMM(CGpuKernelBase):
numgroups
=
self
.
num_groups
numgroups
=
self
.
num_groups
unshared
=
int
(
self
.
unshared
)
unshared
=
int
(
self
.
unshared
)
if
self
.
border_mode
==
"half"
:
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
-
1
elif
self
.
border_mode
==
"full"
:
elif
self
.
border_mode
==
"full"
:
padH
=
padW
=
-
2
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
-
2
elif
isinstance
(
self
.
border_mode
,
tuple
):
elif
isinstance
(
self
.
border_mode
,
tuple
):
padH
,
padW
=
self
.
border_mode
(
padH_l
,
padH_r
),
(
padW_l
,
padW_r
)
=
self
.
border_mode
else
:
else
:
assert
self
.
border_mode
==
"valid"
assert
self
.
border_mode
==
"valid"
padH
=
padW
=
0
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
0
if
direction
==
"forward"
:
if
direction
==
"forward"
:
direction
=
0
direction
=
0
out
=
top
out
=
top
...
@@ -613,13 +630,13 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -613,13 +630,13 @@ class BaseGpuCorrMM(CGpuKernelBase):
if
height
:
if
height
:
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
height
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
height
else
:
else
:
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
==
-
1
)):
if
((
direction
!=
0
)
and
(
dH
!=
1
))
or
((
direction
==
1
)
and
(
padH
_l
==
-
1
or
padH_r
==
-
1
)):
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
raise
ValueError
(
"height must be given for backprop with vertical sampling or pad='half'"
)
height
=
'-1'
height
=
'-1'
if
width
:
if
width
:
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
width
=
'(*(npy_int*)(PyArray_DATA(
%
s)))'
%
width
else
:
else
:
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
==
-
1
)):
if
((
direction
!=
0
)
and
(
dW
!=
1
))
or
((
direction
==
1
)
and
(
padW
_l
==
-
1
or
padW_r
==
-
1
)):
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
raise
ValueError
(
"width must be given for backprop with horizontal sampling or pad='half'"
)
width
=
'-1'
width
=
'-1'
...
@@ -635,8 +652,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -635,8 +652,10 @@ class BaseGpuCorrMM(CGpuKernelBase):
size_t dW =
%(dW)
s;
size_t dW =
%(dW)
s;
size_t dilH =
%(dilH)
s;
size_t dilH =
%(dilH)
s;
size_t dilW =
%(dilW)
s;
size_t dilW =
%(dilW)
s;
int padH =
%(padH)
s;
int padH_l =
%(padH_l)
s;
int padW =
%(padW)
s;
int padH_r =
%(padH_r)
s;
int padW_l =
%(padW_l)
s;
int padW_r =
%(padW_r)
s;
int numgroups =
%(numgroups)
s;
int numgroups =
%(numgroups)
s;
int unshared =
%(unshared)
s;
int unshared =
%(unshared)
s;
...
@@ -662,22 +681,22 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -662,22 +681,22 @@ class BaseGpuCorrMM(CGpuKernelBase):
// kernel height is specified (perhaps vertical subsampling or half padding)
// kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
kH =
%(height)
s;
}
}
else if (padH == -2) {
else if (padH
_l == -2 || padH_r
== -2) {
// vertical full padding, we can infer the kernel height
// vertical full padding, we can infer the kernel height
kH = (2 - PyGpuArray_DIMS(bottom)[2] + (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1;
kH = (2 - PyGpuArray_DIMS(bottom)[2] + (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1;
}
}
else {
else {
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH = (PyGpuArray_DIMS(bottom)[2] +
2*padH
- (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
kH = (PyGpuArray_DIMS(bottom)[2] +
padH_l + padH_r
- (PyGpuArray_DIMS(top)[2] - 1) * dH - 1) / dilH + 1 ;
}
}
if (
%(width)
s != -1) {
if (
%(width)
s != -1) {
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW
_l == -2 || padW_r
== -2) {
kW = (2 - PyGpuArray_DIMS(bottom)[3] + (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (2 - PyGpuArray_DIMS(bottom)[3] + (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
else {
else {
kW = (PyGpuArray_DIMS(bottom)[3] +
2*padW
- (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (PyGpuArray_DIMS(bottom)[3] +
padW_l + padW_r
- (PyGpuArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
}
}
...
@@ -686,23 +705,23 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -686,23 +705,23 @@ class BaseGpuCorrMM(CGpuKernelBase):
dil_kW = (kW - 1) * dilW + 1;
dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH
_l == -1 || padH_r
== -1) { // vertical half padding
padH = dil_kH / 2;
padH
_l = padH_r
= dil_kH / 2;
}
}
else if (padH == -2) { // vertical full padding
else if (padH
_l == -2 || padH_r
== -2) { // vertical full padding
padH = dil_kH - 1;
padH
_l = padH_r
= dil_kH - 1;
}
}
else if (padH < 0) {
else if (padH
_l < 0 || padH_r
< 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
%(fail)
s
%(fail)
s
}
}
if (padW == -1) { // horizontal half padding
if (padW
_l == -1 || padW_r
== -1) { // horizontal half padding
padW = dil_kW / 2;
padW
_l = padW_r
= dil_kW / 2;
}
}
else if (padW == -2) { // horizontal full padding
else if (padW
_l == -2 || padW_r
== -2) { // horizontal full padding
padW = dil_kW - 1;
padW
_l = padW_r
= dil_kW - 1;
}
}
else if (padW < 0) {
else if (padW
_l < 0 || padW_r
< 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
%(fail)
s
%(fail)
s
}
}
...
@@ -718,11 +737,11 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -718,11 +737,11 @@ class BaseGpuCorrMM(CGpuKernelBase):
switch(direction) {
switch(direction) {
case 0: // forward pass
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom +
2*pad
- ((weight-1)*dil + 1)) / sample + 1
// height and width: top = (bottom +
pad_l + pad_r
- ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = PyGpuArray_DIMS(bottom)[0];
out_dim[0] = PyGpuArray_DIMS(bottom)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[0];
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] +
2*padH
- ((PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1;
out_dim[2] = (PyGpuArray_DIMS(bottom)[2] +
padH_l + padH_r
- ((PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] +
2*padW
- ((PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1;
out_dim[3] = (PyGpuArray_DIMS(bottom)[3] +
padW_l + padW_r
- ((PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1;
out_typecode = bottom->ga.typecode;
out_typecode = bottom->ga.typecode;
out_context = bottom->context;
out_context = bottom->context;
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
...
@@ -810,8 +829,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -810,8 +829,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[0] = PyGpuArray_DIMS(top)[0];
out_dim[1] = PyGpuArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[1] = PyGpuArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1 -
2*padH
;
out_dim[2] = (
%(height)
s != -1) ?
%(height)
s : (PyGpuArray_DIMS(top)[2] - 1) * dH + (PyGpuArray_DIMS(weights)[wdim-2]-1)*dilH + 1 -
padH_l - padH_r
;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1 -
2*padW
;
out_dim[3] = (
%(width)
s != -1) ?
%(width)
s : (PyGpuArray_DIMS(top)[3] - 1) * dW + (PyGpuArray_DIMS(weights)[wdim-1]-1)*dilW + 1 -
padW_l - padW_r
;
out_typecode = top->ga.typecode;
out_typecode = top->ga.typecode;
out_context = top->context;
out_context = top->context;
if (unshared) {
if (unshared) {
...
@@ -884,7 +903,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
...
@@ -884,7 +903,8 @@ class BaseGpuCorrMM(CGpuKernelBase):
}
}
// Call GPU code
// Call GPU code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups, unshared);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW,
padH_l, padH_r, padW_l, padW_r, numgroups, unshared);
if (out2==NULL){
if (out2==NULL){
%(fail)
s
%(fail)
s
}
}
...
@@ -907,8 +927,11 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -907,8 +927,11 @@ class GpuCorrMM(BaseGpuCorrMM):
``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'``
``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'``
for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution),
for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution),
``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same
``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same
convolution for odd-sized kernels). Note that the two widths are each
convolution for odd-sized kernels).
applied twice, once per side (left and right, top and bottom).
If it is a tuple containing 2 pairs of integers, then these specify
the padding to be applied on each side ((left, right), (top, bottom)).
Otherwise, each width is applied twice, once per side (left and right,
top and bottom).
subsample
subsample
The subsample operation applied to each output image.
The subsample operation applied to each output image.
Should be a tuple with 2 elements.
Should be a tuple with 2 elements.
...
...
theano/gpuarray/c_code/corr_gemm.c
浏览文件 @
ea62004b
...
@@ -43,7 +43,7 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
...
@@ -43,7 +43,7 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
dilation_h
,
const
ga_size
dilation_w
,
const
ga_size
dilation_h
,
const
ga_size
dilation_w
,
const
ga_size
pad_h
,
const
ga_size
pad_w
,
const
ga_size
pad_h
l
,
const
ga_size
pad_wl
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
height_col
,
const
ga_size
width_col
,
const
ga_size
height_col
,
const
ga_size
width_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col
,
...
@@ -58,8 +58,8 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
...
@@ -58,8 +58,8 @@ KERNEL void dilated_im2col_kernel(const ga_size n,
const
ga_size
w_col
=
index
%
width_col
;
const
ga_size
w_col
=
index
%
width_col
;
const
ga_size
c_im
=
h_index
/
height_col
;
const
ga_size
c_im
=
h_index
/
height_col
;
const
ga_size
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
ga_size
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
ga_size
h_offset
=
h_col
*
stride_h
-
pad_h
;
const
ga_size
h_offset
=
h_col
*
stride_h
-
pad_h
l
;
const
ga_size
w_offset
=
w_col
*
stride_w
-
pad_w
;
const
ga_size
w_offset
=
w_col
*
stride_w
-
pad_w
l
;
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col_ptr
=
data_col
;
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col_ptr
=
data_col
;
data_col_ptr
+=
(
c_col
*
height_col
+
h_col
)
*
width_col
+
w_col
;
data_col_ptr
+=
(
c_col
*
height_col
+
h_col
)
*
width_col
+
w_col
;
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_im_ptr
=
data_im
+
data_im_offset
;
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_im_ptr
=
data_im
+
data_im_offset
;
...
@@ -88,7 +88,7 @@ KERNEL void im2col_kernel(const ga_size n,
...
@@ -88,7 +88,7 @@ KERNEL void im2col_kernel(const ga_size n,
// data_im_offset is an offset of elements in the array
// data_im_offset is an offset of elements in the array
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
pad_h
,
const
ga_size
pad_w
,
const
ga_size
pad_h
l
,
const
ga_size
pad_wl
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
height_col
,
const
ga_size
width_col
,
const
ga_size
height_col
,
const
ga_size
width_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col
,
...
@@ -103,8 +103,8 @@ KERNEL void im2col_kernel(const ga_size n,
...
@@ -103,8 +103,8 @@ KERNEL void im2col_kernel(const ga_size n,
const
ga_size
w_col
=
index
%
width_col
;
const
ga_size
w_col
=
index
%
width_col
;
const
ga_size
c_im
=
h_index
/
height_col
;
const
ga_size
c_im
=
h_index
/
height_col
;
const
ga_size
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
ga_size
c_col
=
c_im
*
kernel_h
*
kernel_w
;
const
ga_size
h_offset
=
h_col
*
stride_h
-
pad_h
;
const
ga_size
h_offset
=
h_col
*
stride_h
-
pad_h
l
;
const
ga_size
w_offset
=
w_col
*
stride_w
-
pad_w
;
const
ga_size
w_offset
=
w_col
*
stride_w
-
pad_w
l
;
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col_ptr
=
data_col
;
GLOBAL_MEM
DTYPE_INPUT_0
*
data_col_ptr
=
data_col
;
data_col_ptr
+=
(
c_col
*
height_col
+
h_col
)
*
width_col
+
w_col
;
data_col_ptr
+=
(
c_col
*
height_col
+
h_col
)
*
width_col
+
w_col
;
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_im_ptr
=
data_im
+
data_im_offset
;
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_im_ptr
=
data_im
+
data_im_offset
;
...
@@ -131,7 +131,7 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
...
@@ -131,7 +131,7 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
channels
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
channels
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
dilation_h
,
const
ga_size
dilation_w
,
const
ga_size
dilation_h
,
const
ga_size
dilation_w
,
const
ga_size
pad_h
,
const
ga_size
pad_w
,
const
ga_size
pad_h
l
,
const
ga_size
pad_wl
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
height_col
,
const
ga_size
width_col
,
const
ga_size
height_col
,
const
ga_size
width_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_im
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_im
,
...
@@ -145,8 +145,8 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
...
@@ -145,8 +145,8 @@ KERNEL void dilated_col2im_kernel(const ga_size n,
for
(
ga_size
index
=
GID_0
*
LDIM_0
+
LID_0
;
for
(
ga_size
index
=
GID_0
*
LDIM_0
+
LID_0
;
index
<
(
n
);
index
+=
LDIM_0
*
GDIM_0
)
{
index
<
(
n
);
index
+=
LDIM_0
*
GDIM_0
)
{
DTYPE_INPUT_0
val
=
0
;
DTYPE_INPUT_0
val
=
0
;
const
ga_size
w_im
=
index
%
width
+
pad_w
;
const
ga_size
w_im
=
index
%
width
+
pad_w
l
;
const
ga_size
h_im
=
(
index
/
width
)
%
height
+
pad_h
;
const
ga_size
h_im
=
(
index
/
width
)
%
height
+
pad_h
l
;
const
ga_size
c_im
=
index
/
(
width
*
height
);
const
ga_size
c_im
=
index
/
(
width
*
height
);
ga_size
kernel_extent_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
ga_size
kernel_extent_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
ga_size
kernel_extent_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
ga_size
kernel_extent_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
...
@@ -182,7 +182,7 @@ KERNEL void col2im_kernel(const ga_size n,
...
@@ -182,7 +182,7 @@ KERNEL void col2im_kernel(const ga_size n,
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_col
,
const
ga_size
offset_col
,
GLOBAL_MEM
const
DTYPE_INPUT_0
*
data_col
,
const
ga_size
offset_col
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
channels
,
const
ga_size
height
,
const
ga_size
width
,
const
ga_size
channels
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
kernel_h
,
const
ga_size
kernel_w
,
const
ga_size
pad_h
,
const
ga_size
pad_w
,
const
ga_size
pad_h
l
,
const
ga_size
pad_wl
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
stride_h
,
const
ga_size
stride_w
,
const
ga_size
height_col
,
const
ga_size
width_col
,
const
ga_size
height_col
,
const
ga_size
width_col
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_im
,
GLOBAL_MEM
DTYPE_INPUT_0
*
data_im
,
...
@@ -196,8 +196,8 @@ KERNEL void col2im_kernel(const ga_size n,
...
@@ -196,8 +196,8 @@ KERNEL void col2im_kernel(const ga_size n,
for
(
ga_size
index
=
GID_0
*
LDIM_0
+
LID_0
;
for
(
ga_size
index
=
GID_0
*
LDIM_0
+
LID_0
;
index
<
(
n
);
index
+=
LDIM_0
*
GDIM_0
)
{
index
<
(
n
);
index
+=
LDIM_0
*
GDIM_0
)
{
DTYPE_INPUT_0
val
=
0
;
DTYPE_INPUT_0
val
=
0
;
const
ga_size
w_im
=
index
%
width
+
pad_w
;
const
ga_size
w_im
=
index
%
width
+
pad_w
l
;
const
ga_size
h_im
=
(
index
/
width
)
%
height
+
pad_h
;
const
ga_size
h_im
=
(
index
/
width
)
%
height
+
pad_h
l
;
const
ga_size
c_im
=
index
/
(
width
*
height
);
const
ga_size
c_im
=
index
/
(
width
*
height
);
// compute the start and end of the output
// compute the start and end of the output
const
ga_size
w_col_start
=
const
ga_size
w_col_start
=
...
@@ -259,15 +259,16 @@ int rgemm(cb_order o, cb_transpose tA, cb_transpose tB,
...
@@ -259,15 +259,16 @@ int rgemm(cb_order o, cb_transpose tA, cb_transpose tB,
int
im2col
(
GpuArray
*
data_im
,
const
size_t
data_im_offset
,
const
size_t
channels
,
int
im2col
(
GpuArray
*
data_im
,
const
size_t
data_im_offset
,
const
size_t
channels
,
const
size_t
height
,
const
size_t
width
,
const
size_t
kernel_h
,
const
size_t
kernel_w
,
const
size_t
height
,
const
size_t
width
,
const
size_t
kernel_h
,
const
size_t
kernel_w
,
const
size_t
dilation_h
,
const
size_t
dilation_w
,
const
size_t
dilation_h
,
const
size_t
dilation_w
,
const
size_t
pad_h
,
const
size_t
pad_w
,
const
size_t
pad_hl
,
const
size_t
pad_hr
,
const
size_t
pad_wl
,
const
size_t
pad_wr
,
const
size_t
stride_h
,
const
size_t
stride_w
,
const
size_t
stride_h
,
const
size_t
stride_w
,
GpuArray
*
data_col
)
{
GpuArray
*
data_col
)
{
// We are going to launch channels * height_col * width_col kernels, each
// We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid.
// kernel responsible for copying a single-channel grid.
size_t
dil_kernel_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
size_t
dil_kernel_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
size_t
dil_kernel_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
size_t
dil_kernel_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
size_t
height_col
=
(
height
+
2
*
pad_h
-
dil_kernel_h
)
/
stride_h
+
1
;
size_t
height_col
=
(
height
+
pad_hl
+
pad_hr
-
dil_kernel_h
)
/
stride_h
+
1
;
size_t
width_col
=
(
width
+
2
*
pad_w
-
dil_kernel_w
)
/
stride_w
+
1
;
size_t
width_col
=
(
width
+
pad_wl
+
pad_wr
-
dil_kernel_w
)
/
stride_w
+
1
;
size_t
num_kernels
=
channels
*
height_col
*
width_col
;
size_t
num_kernels
=
channels
*
height_col
*
width_col
;
int
err
;
int
err
;
if
(
dilation_h
!=
1
||
dilation_w
!=
1
)
{
if
(
dilation_h
!=
1
||
dilation_w
!=
1
)
{
...
@@ -275,7 +276,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
...
@@ -275,7 +276,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
1
,
&
num_kernels
,
0
,
1
,
&
num_kernels
,
0
,
num_kernels
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
,
num_kernels
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
,
height
,
width
,
kernel_h
,
kernel_w
,
height
,
width
,
kernel_h
,
kernel_w
,
dilation_h
,
dilation_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
height_col
,
dilation_h
,
dilation_w
,
pad_h
l
,
pad_wl
,
stride_h
,
stride_w
,
height_col
,
width_col
,
data_col
->
data
,
data_col
->
offset
);
width_col
,
data_col
->
data
,
data_col
->
offset
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -287,7 +288,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
...
@@ -287,7 +288,7 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
1
,
&
num_kernels
,
0
,
1
,
&
num_kernels
,
0
,
num_kernels
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
,
num_kernels
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
,
height
,
width
,
kernel_h
,
kernel_w
,
height
,
width
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
height_col
,
pad_h
l
,
pad_wl
,
stride_h
,
stride_w
,
height_col
,
width_col
,
data_col
->
data
,
data_col
->
offset
);
width_col
,
data_col
->
data
,
data_col
->
offset
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -301,12 +302,12 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
...
@@ -301,12 +302,12 @@ int im2col(GpuArray *data_im, const size_t data_im_offset, const size_t channels
int
col2im
(
GpuArray
*
data_col
,
const
size_t
channels
,
int
col2im
(
GpuArray
*
data_col
,
const
size_t
channels
,
const
size_t
height
,
const
size_t
width
,
const
size_t
patch_h
,
const
size_t
patch_w
,
const
size_t
height
,
const
size_t
width
,
const
size_t
patch_h
,
const
size_t
patch_w
,
const
size_t
dilation_h
,
const
size_t
dilation_w
,
const
size_t
dilation_h
,
const
size_t
dilation_w
,
const
size_t
pad_h
,
const
size_t
pad_w
,
const
size_t
stride_h
,
const
size_t
pad_h
l
,
const
size_t
pad_hr
,
const
size_t
pad_wl
,
const
size_t
pad_wr
,
const
size_t
stride_w
,
GpuArray
*
data_im
,
const
size_t
data_im_offset
)
{
const
size_t
stride_
h
,
const
size_t
stride_
w
,
GpuArray
*
data_im
,
const
size_t
data_im_offset
)
{
size_t
dil_patch_h
=
(
patch_h
-
1
)
*
dilation_h
+
1
;
size_t
dil_patch_h
=
(
patch_h
-
1
)
*
dilation_h
+
1
;
size_t
dil_patch_w
=
(
patch_w
-
1
)
*
dilation_w
+
1
;
size_t
dil_patch_w
=
(
patch_w
-
1
)
*
dilation_w
+
1
;
size_t
height_col
=
(
height
+
2
*
pad_h
-
dil_patch_h
)
/
stride_h
+
1
;
size_t
height_col
=
(
height
+
pad_hl
+
pad_hr
-
dil_patch_h
)
/
stride_h
+
1
;
size_t
width_col
=
(
width
+
2
*
pad_w
-
dil_patch_w
)
/
stride_w
+
1
;
size_t
width_col
=
(
width
+
pad_wl
+
pad_wr
-
dil_patch_w
)
/
stride_w
+
1
;
size_t
num_kernels
=
channels
*
height
*
width
;
size_t
num_kernels
=
channels
*
height
*
width
;
// To avoid involving atomic operations, we will launch one kernel per
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
// bottom dimension, and then in the kernel add up the top dimensions.
...
@@ -316,7 +317,7 @@ int col2im(GpuArray *data_col, const size_t channels,
...
@@ -316,7 +317,7 @@ int col2im(GpuArray *data_col, const size_t channels,
1
,
&
num_kernels
,
0
,
1
,
&
num_kernels
,
0
,
num_kernels
,
data_col
->
data
,
data_col
->
offset
,
num_kernels
,
data_col
->
data
,
data_col
->
offset
,
height
,
width
,
channels
,
patch_h
,
patch_w
,
height
,
width
,
channels
,
patch_h
,
patch_w
,
dilation_h
,
dilation_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
pad_h
l
,
pad_wl
,
stride_h
,
stride_w
,
height_col
,
width_col
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
);
height_col
,
width_col
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -328,7 +329,7 @@ int col2im(GpuArray *data_col, const size_t channels,
...
@@ -328,7 +329,7 @@ int col2im(GpuArray *data_col, const size_t channels,
1
,
&
num_kernels
,
0
,
1
,
&
num_kernels
,
0
,
num_kernels
,
data_col
->
data
,
data_col
->
offset
,
num_kernels
,
data_col
->
data
,
data_col
->
offset
,
height
,
width
,
channels
,
patch_h
,
patch_w
,
height
,
width
,
channels
,
patch_h
,
patch_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
pad_h
l
,
pad_wl
,
stride_h
,
stride_w
,
height_col
,
width_col
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
);
height_col
,
width_col
,
data_im
->
data
,
data_im
->
offset
,
data_im_offset
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -352,8 +353,10 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -352,8 +353,10 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dW
=
1
,
const
size_t
dW
=
1
,
const
size_t
dilH
=
1
,
const
size_t
dilH
=
1
,
const
size_t
dilW
=
1
,
const
size_t
dilW
=
1
,
const
size_t
padH
=
0
,
const
size_t
padH_l
=
0
,
const
size_t
padW
=
0
,
const
size_t
padH_r
=
0
,
const
size_t
padW_l
=
0
,
const
size_t
padW_r
=
0
,
const
size_t
numgroups
=
1
,
const
size_t
numgroups
=
1
,
const
size_t
unshared
=
0
)
const
size_t
unshared
=
0
)
{
{
...
@@ -448,8 +451,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -448,8 +451,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
size_t
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth)
// top: (batchSize, nFilters, topHeight, topWidth)
const
size_t
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
size_t
topHeightNoDH
=
(
bottomHeight
+
padH_l
+
padH_r
-
dil_kH
);
const
size_t
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
const
size_t
topWidthNoDW
=
(
bottomWidth
+
padW_l
+
padW_r
-
dil_kW
);
// the above values might be negative so we need to use Python-like
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
// note: this macro implements Python's // for negative x only
...
@@ -563,7 +566,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -563,7 +566,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_bottom_stride
,
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
padH
_l
,
padH_r
,
padW_l
,
padW_r
,
dH
,
dW
,
&
col
->
ga
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
return
NULL
;
return
NULL
;
...
@@ -623,7 +626,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -623,7 +626,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_bottom_stride
,
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
padH
_l
,
padH_r
,
padW_l
,
padW_r
,
dH
,
dW
,
&
col
->
ga
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
return
NULL
;
return
NULL
;
...
@@ -717,7 +720,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -717,7 +720,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
}
}
// col2im back to the data
// col2im back to the data
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
kH
,
kW
,
dilH
,
dilW
,
padH
_l
,
padH_r
,
padW_l
,
padW_r
,
dH
,
dW
,
&
bottom
->
ga
,
n
*
batch_bottom_stride
);
dH
,
dW
,
&
bottom
->
ga
,
n
*
batch_bottom_stride
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
...
...
theano/gpuarray/dnn.py
浏览文件 @
ea62004b
...
@@ -3084,6 +3084,10 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
...
@@ -3084,6 +3084,10 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if
op
.
unshared
:
if
op
.
unshared
:
return
None
return
None
if
isinstance
(
op
.
border_mode
,
tuple
)
and
any
(
isinstance
(
p
,
tuple
)
for
p
in
op
.
border_mode
):
# Asymmetric padding not yet supported
return
None
inp1
=
inputs
[
0
]
inp1
=
inputs
[
0
]
inp2
=
inputs
[
1
]
inp2
=
inputs
[
1
]
...
@@ -3180,6 +3184,9 @@ def local_abstractconv_cudnn(node):
...
@@ -3180,6 +3184,9 @@ def local_abstractconv_cudnn(node):
return
return
if
node
.
op
.
unshared
:
if
node
.
op
.
unshared
:
return
None
return
None
if
isinstance
(
node
.
op
.
border_mode
,
tuple
)
and
any
(
isinstance
(
p
,
tuple
)
for
p
in
node
.
op
.
border_mode
):
# Asymmetric padding not yet supported
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d
):
if
isinstance
(
node
.
op
,
AbstractConv2d
):
with
inherit_stack_trace
(
node
.
outputs
):
with
inherit_stack_trace
(
node
.
outputs
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
...
@@ -3198,6 +3205,9 @@ def local_abstractconv_cudnn_alt(node):
...
@@ -3198,6 +3205,9 @@ def local_abstractconv_cudnn_alt(node):
return
None
return
None
if
node
.
op
.
unshared
:
if
node
.
op
.
unshared
:
return
None
return
None
if
isinstance
(
node
.
op
.
border_mode
,
tuple
)
and
any
(
isinstance
(
p
,
tuple
)
for
p
in
node
.
op
.
border_mode
):
# Asymmetric padding not yet supported
return
None
inp1
=
node
.
inputs
[
0
]
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
inp2
=
node
.
inputs
[
1
]
...
@@ -3407,6 +3417,9 @@ def local_abstractconv_gw_cudnn(node):
...
@@ -3407,6 +3417,9 @@ def local_abstractconv_gw_cudnn(node):
return
return
if
node
.
op
.
unshared
:
if
node
.
op
.
unshared
:
return
None
return
None
if
isinstance
(
node
.
op
.
border_mode
,
tuple
)
and
any
(
isinstance
(
p
,
tuple
)
for
p
in
node
.
op
.
border_mode
):
# Asymmetric padding not yet supported
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
if
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
with
inherit_stack_trace
(
node
.
outputs
):
with
inherit_stack_trace
(
node
.
outputs
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
...
@@ -3422,6 +3435,9 @@ def local_abstractconv_gi_cudnn(node):
...
@@ -3422,6 +3435,9 @@ def local_abstractconv_gi_cudnn(node):
return
return
if
node
.
op
.
unshared
:
if
node
.
op
.
unshared
:
return
None
return
None
if
isinstance
(
node
.
op
.
border_mode
,
tuple
)
and
any
(
isinstance
(
p
,
tuple
)
for
p
in
node
.
op
.
border_mode
):
# Asymmetric padding not yet supported
return
None
if
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
):
if
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
):
with
inherit_stack_trace
(
node
.
outputs
):
with
inherit_stack_trace
(
node
.
outputs
):
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
return
local_abstractconv_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
...
...
theano/gpuarray/tests/test_gemmcorr.py
浏览文件 @
ea62004b
...
@@ -12,6 +12,7 @@ from ..type import gpuarray_shared_constructor
...
@@ -12,6 +12,7 @@ from ..type import gpuarray_shared_constructor
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
from
theano.tensor.nnet.tests.test_abstract_conv
import
TestAsymmetricPadding
,
TestCausalConv
class
TestCorrMM
(
unittest
.
TestCase
):
class
TestCorrMM
(
unittest
.
TestCase
):
...
@@ -272,3 +273,14 @@ class TestUnsharedGpuCorr2d(TestUnsharedConv):
...
@@ -272,3 +273,14 @@ class TestUnsharedGpuCorr2d(TestUnsharedConv):
conv2d_op
=
GpuCorrMM
conv2d_op
=
GpuCorrMM
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradi_op
=
GpuCorrMM_gradInputs
conv2d_gradi_op
=
GpuCorrMM_gradInputs
class
TestAsymmetricGpu
(
TestAsymmetricPadding
):
mode
=
mode_with_gpu
conv2d_op
=
GpuCorrMM
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradi_op
=
GpuCorrMM_gradInputs
class
TestCausalGpuCorr
(
TestCausalConv
):
mode
=
mode_with_gpu
theano/tensor/nnet/__init__.py
浏览文件 @
ea62004b
...
@@ -72,7 +72,7 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
...
@@ -72,7 +72,7 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
You can give ``None`` for any element of the list to specify that this
You can give ``None`` for any element of the list to specify that this
element is not known at compile time.
element is not known at compile time.
border_mode: str, int or
tuple of two int
border_mode: str, int or
a tuple of two ints or pairs of ints
Either of the following:
Either of the following:
``'valid'``: apply filter wherever it completely overlaps with the
``'valid'``: apply filter wherever it completely overlaps with the
...
@@ -85,8 +85,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
...
@@ -85,8 +85,11 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
leads to the output shape being equal to the input shape.
leads to the output shape being equal to the input shape.
``int``: pad input with a symmetric border of zeros of the given
``int``: pad input with a symmetric border of zeros of the given
width, then perform a valid convolution.
width, then perform a valid convolution.
``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
``(int1, int2)``: (for 2D) pad input with a symmetric border of ``int1``,
and ``int2`` columns, then perform a valid convolution.
``int2``, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``: (for 2D)
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
subsample: tuple of len 2
subsample: tuple of len 2
Factor by which to subsample the output.
Factor by which to subsample the output.
...
...
theano/tensor/nnet/abstract_conv.py
浏览文件 @
ea62004b
...
@@ -53,9 +53,10 @@ def get_conv_output_shape(image_shape, kernel_shape,
...
@@ -53,9 +53,10 @@ def get_conv_output_shape(image_shape, kernel_shape,
input channels, height and width of the kernel.
input channels, height and width of the kernel.
None where undefined.
None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
or numeric) or pairs of ints. If it is a string, it must be 'valid',
If it is a tuple, its two (or three) elements respectively correspond
'half' or 'full'. If it is a tuple, its two (or three) elements respectively
to the padding on height and width (and possibly depth) axis.
correspond to the padding on height and width (and possibly depth)
axis. For asymmetric padding, provide a pair of ints for each dimension.
subsample: tuple of int (symbolic or numeric). Its two or three elements
subsample: tuple of int (symbolic or numeric). Its two or three elements
espectively correspond to the subsampling on height and width (and
espectively correspond to the subsampling on height and width (and
possibly depth) axis.
possibly depth) axis.
...
@@ -103,9 +104,11 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
...
@@ -103,9 +104,11 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
given axis. None if undefined.
given axis. None if undefined.
kernel_shape: int or None. Corresponds to the kernel shape on a given
kernel_shape: int or None. Corresponds to the kernel shape on a given
axis. None if undefined.
axis. None if undefined.
border_mode: string
or int
. If it is a string, it must be
border_mode: string
, int or tuple of 2 ints
. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to
'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis.
the padding on the considered axis. If it is a tuple, its two elements
must correspond to the asymmetric padding (e.g., left and right) on
the considered axis.
subsample: int. It must correspond to the subsampling on the
subsample: int. It must correspond to the subsampling on the
considered axis.
considered axis.
dilation: int. It must correspond to the dilation on the
dilation: int. It must correspond to the dilation on the
...
@@ -123,22 +126,26 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
...
@@ -123,22 +126,26 @@ def get_conv_shape_1axis(image_shape, kernel_shape, border_mode,
# Implicit dilated kernel shape
# Implicit dilated kernel shape
dil_kernel_shape
=
(
kernel_shape
-
1
)
*
dilation
+
1
dil_kernel_shape
=
(
kernel_shape
-
1
)
*
dilation
+
1
if
border_mode
==
"half"
:
if
border_mode
==
"half"
:
pad
=
dil_kernel_shape
//
2
pad
_l
=
pad_r
=
dil_kernel_shape
//
2
elif
border_mode
==
"full"
:
elif
border_mode
==
"full"
:
pad
=
dil_kernel_shape
-
1
pad
_l
=
pad_r
=
dil_kernel_shape
-
1
elif
border_mode
==
"valid"
:
elif
border_mode
==
"valid"
:
pad
=
0
pad
_l
=
pad_r
=
0
else
:
else
:
pad
=
border_mode
if
isinstance
(
border_mode
,
tuple
):
if
pad
<
0
:
pad_l
,
pad_r
=
border_mode
else
:
pad_l
=
pad_r
=
border_mode
if
pad_l
<
0
or
pad_r
<
0
:
raise
ValueError
(
"border_mode must be >= 0"
)
raise
ValueError
(
"border_mode must be >= 0"
)
# In case of symbolic shape, we want to build the smallest graph
# In case of symbolic shape, we want to build the smallest graph
# (image_shape + 2 * pad - dil_kernel_shape) // subsample + 1
# (image_shape + 2 * pad - dil_kernel_shape) // subsample + 1
if
pad
==
0
:
out_shp
=
(
image_shape
-
dil_kernel_shape
)
out_shp
=
(
image_shape
-
dil_kernel_shape
)
if
pad_l
!=
0
:
else
:
out_shp
+=
pad_l
out_shp
=
(
image_shape
+
2
*
pad
-
dil_kernel_shape
)
if
pad_r
!=
0
:
out_shp
+=
pad_r
if
subsample
!=
1
:
if
subsample
!=
1
:
out_shp
=
out_shp
//
subsample
out_shp
=
out_shp
//
subsample
out_shp
=
out_shp
+
1
out_shp
=
out_shp
+
1
...
@@ -168,9 +175,10 @@ def get_conv_gradweights_shape(image_shape, top_shape,
...
@@ -168,9 +175,10 @@ def get_conv_gradweights_shape(image_shape, top_shape,
to: batch size, number of output channels, height and width (and
to: batch size, number of output channels, height and width (and
possibly depth) of the image. None where undefined.
possibly depth) of the image. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
or numeric) or pairs of ints. If it is a string, it must be 'valid',
If it is a tuple, its two (or three) elements respectively correspond
'half' or 'full'. If it is a tuple, its two (or three) elements respectively
to the padding on height and width (and possibly depth) axis.
correspond to the padding on height and width (and possibly depth)
axis. For asymmetric padding, provide a pair of ints for each dimension.
subsample: tuple of int (symbolic or numeric). Its two or three elements
subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and
respectively correspond to the subsampling on height and width (and
possibly depth) axis.
possibly depth) axis.
...
@@ -227,9 +235,11 @@ def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode,
...
@@ -227,9 +235,11 @@ def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode,
given axis. None if undefined.
given axis. None if undefined.
top_shape: int or None. Corresponds to the top shape on a given axis.
top_shape: int or None. Corresponds to the top shape on a given axis.
None if undefined.
None if undefined.
border_mode: string
or int
. If it is a string, it must be
border_mode: string
, int or tuple of 2 ints
. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to
'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis.
the padding on the considered axis. If it is a tuple, its two elements
must correspond to the asymmetric padding (e.g., left and right) on
the considered axis.
subsample: int. It must correspond to the subsampling on the
subsample: int. It must correspond to the subsampling on the
considered axis.
considered axis.
dilation: int. It must correspond to the dilation on the
dilation: int. It must correspond to the dilation on the
...
@@ -252,9 +262,14 @@ def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode,
...
@@ -252,9 +262,14 @@ def get_conv_gradweights_shape_1axis(image_shape, top_shape, border_mode,
elif
border_mode
==
"valid"
:
elif
border_mode
==
"valid"
:
kernel_shape
=
image_shape
-
top_shape
kernel_shape
=
image_shape
-
top_shape
else
:
else
:
if
border_mode
<
0
:
if
isinstance
(
border_mode
,
tuple
):
pad_l
,
pad_r
=
border_mode
else
:
pad_l
=
pad_r
=
border_mode
if
pad_l
<
0
or
pad_r
<
0
:
raise
ValueError
(
"border_mode must be >= 0"
)
raise
ValueError
(
"border_mode must be >= 0"
)
kernel_shape
=
(
image_shape
+
2
*
border_mode
-
top_shape
)
kernel_shape
=
(
image_shape
+
pad_l
+
pad_r
-
top_shape
)
if
dilation
>
1
:
if
dilation
>
1
:
kernel_shape
=
kernel_shape
/
dilation
kernel_shape
=
kernel_shape
/
dilation
...
@@ -284,9 +299,10 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
...
@@ -284,9 +299,10 @@ def get_conv_gradinputs_shape(kernel_shape, top_shape,
to: batch size, number of output channels, height and width (and
to: batch size, number of output channels, height and width (and
possibly depth) of the image. None where undefined.
possibly depth) of the image. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
or numeric) or pairs of ints. If it is a string, it must be 'valid',
If it is a tuple, its two (or three) elements respectively correspond
'half' or 'full'. If it is a tuple, its two (or three) elements respectively
to the padding on height and width (and possibly depth) axis.
correspond to the padding on height and width (and possibly depth)
axis. For asymmetric padding, provide a pair of ints for each dimension.
subsample: tuple of int (symbolic or numeric). Its two or three elements
subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and
respectively correspond to the subsampling on height and width (and
possibly depth) axis.
possibly depth) axis.
...
@@ -340,9 +356,11 @@ def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode,
...
@@ -340,9 +356,11 @@ def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode,
axis. None if undefined.
axis. None if undefined.
top_shape: int or None. Corresponds to the top shape on a given axis.
top_shape: int or None. Corresponds to the top shape on a given axis.
None if undefined.
None if undefined.
border_mode: string
or int
. If it is a string, it must be
border_mode: string
, int or tuple of 2 ints
. If it is a string, it must be
'valid', 'half' or 'full'. If it is an integer, it must correspond to
'valid', 'half' or 'full'. If it is an integer, it must correspond to
the padding on the considered axis.
the padding on the considered axis. If it is a tuple, its two elements
must correspond to the asymmetric padding (e.g., left and right) on
the considered axis.
subsample: int. It must correspond to the subsampling on the
subsample: int. It must correspond to the subsampling on the
considered axis.
considered axis.
dilation: int. It must correspond to the dilation on the
dilation: int. It must correspond to the dilation on the
...
@@ -363,23 +381,27 @@ def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode,
...
@@ -363,23 +381,27 @@ def get_conv_gradinputs_shape_1axis(kernel_shape, top_shape, border_mode,
# Implicit dilated kernel shape
# Implicit dilated kernel shape
dil_kernel_shape
=
(
kernel_shape
-
1
)
*
dilation
+
1
dil_kernel_shape
=
(
kernel_shape
-
1
)
*
dilation
+
1
if
border_mode
==
"half"
:
if
border_mode
==
"half"
:
pad
=
dil_kernel_shape
//
2
pad
_l
=
pad_r
=
dil_kernel_shape
//
2
elif
border_mode
==
"full"
:
elif
border_mode
==
"full"
:
pad
=
dil_kernel_shape
-
1
pad
_l
=
pad_r
=
dil_kernel_shape
-
1
elif
border_mode
==
"valid"
:
elif
border_mode
==
"valid"
:
pad
=
0
pad
_l
=
pad_r
=
0
else
:
else
:
pad
=
border_mode
if
isinstance
(
border_mode
,
tuple
):
if
pad
<
0
:
pad_l
,
pad_r
=
border_mode
else
:
pad_l
=
pad_r
=
border_mode
if
pad_l
<
0
or
pad_r
<
0
:
raise
ValueError
(
"border_mode must be >= 0"
)
raise
ValueError
(
"border_mode must be >= 0"
)
# In case of symbolic shape, we want to build the smallest graph
# In case of symbolic shape, we want to build the smallest graph
# image_shape = (top_shape - 1) * s - 2 * pad + dil_kernel_shape + a
# image_shape = (top_shape - 1) * s - 2 * pad + dil_kernel_shape + a
# where 0 <= a < subsample, but we have checked that subsample == 1
# where 0 <= a < subsample, but we have checked that subsample == 1
if
pad
==
0
:
image_shape
=
(
top_shape
+
dil_kernel_shape
-
1
)
image_shape
=
(
top_shape
+
dil_kernel_shape
-
1
)
if
pad_l
>
0
:
else
:
image_shape
-=
pad_l
image_shape
=
(
top_shape
-
2
*
pad
+
dil_kernel_shape
-
1
)
if
pad_r
>
0
:
image_shape
-=
pad_r
return
image_shape
return
image_shape
...
@@ -405,9 +427,10 @@ def check_conv_gradinputs_shape(image_shape, kernel_shape, output_shape,
...
@@ -405,9 +427,10 @@ def check_conv_gradinputs_shape(image_shape, kernel_shape, output_shape,
to: batch size, number of output channels, height and width
to: batch size, number of output channels, height and width
(and possibly depth) of the output. None where undefined.
(and possibly depth) of the output. None where undefined.
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
border_mode: string, int (symbolic or numeric) or tuple of int (symbolic
or numeric). If it is a string, it must be 'valid', 'half' or 'full'.
or numeric) or pairs of ints. If it is a string, it must be 'valid',
If it is a tuple, its two (or three) elements respectively correspond
'half' or 'full'. If it is a tuple, its two (or three) elements respectively
to the padding on height and width (and possibly depth) axis.
correspond to the padding on height and width (and possibly depth)
axis. For asymmetric padding, provide a pair of ints for each dimension.
subsample: tuple of int (symbolic or numeric). Its two or three elements
subsample: tuple of int (symbolic or numeric). Its two or three elements
respectively correspond to the subsampling on height and width (and
respectively correspond to the subsampling on height and width (and
possibly depth) axis.
possibly depth) axis.
...
@@ -533,6 +556,63 @@ def assert_shape(x, expected_shape, msg='Unexpected shape.'):
...
@@ -533,6 +556,63 @@ def assert_shape(x, expected_shape, msg='Unexpected shape.'):
return
x
return
x
def
border_mode_to_pad
(
mode
,
convdim
,
kshp
):
"""
Computes a tuple for padding given the border_mode parameter
Parameters
----------
mode : int or tuple
One of "valid", "full", "half", an integer, or a tuple where each
member is either an integer or a tuple of 2 positive integers.
convdim : int
The dimensionality of the convolution.
kshp : List/tuple of length 'convdim', indicating the size of the
kernel in the spatial dimensions.
Returns
-------
A tuple containing 'convdim' elements, each of which is a tuple of
two positive integers corresponding to the padding on the left
and the right sides respectively.
"""
if
isinstance
(
mode
,
tuple
):
if
len
(
mode
)
!=
convdim
:
raise
ValueError
(
'invalid border_mode {} which must be a '
'tuple of length {}'
.
format
(
mode
,
convdim
))
border
=
()
for
m
in
mode
:
if
isinstance
(
m
,
integer_types
)
and
m
>=
0
:
border
+=
((
m
,
m
),)
elif
isinstance
(
m
,
tuple
)
and
min
(
m
)
>=
0
and
\
all
(
isinstance
(
b
,
integer_types
)
for
b
in
m
):
if
len
(
m
)
!=
2
:
raise
NotImplementedError
(
'Asymmetric padding not implemented '
'for {}d'
.
format
(
len
(
m
)))
border
+=
((
m
[
0
],
m
[
1
]),)
else
:
raise
ValueError
(
'invalid border mode {}. The tuple can only contain '
'integers or tuples of length 2'
.
format
(
mode
))
pad
=
border
elif
mode
==
'full'
:
pad
=
tuple
((
kshp
[
i
]
-
1
,)
*
2
for
i
in
range
(
convdim
))
elif
mode
==
'half'
:
pad
=
tuple
((
kshp
[
i
]
//
2
,)
*
2
for
i
in
range
(
convdim
))
elif
mode
==
'valid'
:
pad
=
((
0
,
0
),)
*
convdim
else
:
raise
ValueError
(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple '
'of length {}'
.
format
(
mode
,
convdim
))
return
pad
def
conv2d
(
input
,
def
conv2d
(
input
,
filters
,
filters
,
input_shape
=
None
,
input_shape
=
None
,
...
@@ -632,6 +712,12 @@ def separable_conv2d(input,
...
@@ -632,6 +712,12 @@ def separable_conv2d(input,
width, then perform a valid convolution.
width, then perform a valid convolution.
``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
``(int1, int2)``: pad input with a symmetric border of ``int1`` rows
and ``int2`` columns, then perform a valid convolution.
and ``int2`` columns, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``:
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
``((int1, int2), (int3, int4))``: pad input with an asymmetric
border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
along the second dimension.
subsample: tuple of len 2
subsample: tuple of len 2
Factor by which to subsample the output.
Factor by which to subsample the output.
...
@@ -959,7 +1045,7 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -959,7 +1045,7 @@ def conv2d_grad_wrt_inputs(output_grad,
Optional, possibly used to choose an optimal implementation.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify that
You can give ``None`` for any element of the list to specify that
this element is not known at compile time.
this element is not known at compile time.
border_mode
: str, int or tuple of two int
border_mode
: str, int or a tuple of two ints or pairs of ints
Either of the following:
Either of the following:
``'valid'``
``'valid'``
...
@@ -986,6 +1072,14 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -986,6 +1072,14 @@ def conv2d_grad_wrt_inputs(output_grad,
pad input with a symmetric border of ``int1`` rows and
pad input with a symmetric border of ``int1`` rows and
``int2`` columns, then perform a valid convolution.
``int2`` columns, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
``((int1, int2), (int3, int4))``
pad input with an asymmetric border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
along the second dimension.
subsample : tuple of len 2
subsample : tuple of len 2
The subsampling used in the forward pass. Also called strides
The subsampling used in the forward pass. Also called strides
elsewhere.
elsewhere.
...
@@ -1245,7 +1339,7 @@ def conv2d_grad_wrt_weights(input,
...
@@ -1245,7 +1339,7 @@ def conv2d_grad_wrt_weights(input,
Optional, possibly used to choose an optimal implementation.
Optional, possibly used to choose an optimal implementation.
You can give ``None`` for any element of the list to specify
You can give ``None`` for any element of the list to specify
that this element is not known at compile time.
that this element is not known at compile time.
border_mode
: str, int or tuple of two
ints
border_mode
: str, int or a tuple of two ints or pairs of
ints
Either of the following:
Either of the following:
``'valid'``
``'valid'``
...
@@ -1271,6 +1365,14 @@ def conv2d_grad_wrt_weights(input,
...
@@ -1271,6 +1365,14 @@ def conv2d_grad_wrt_weights(input,
``(int1, int2)``
``(int1, int2)``
pad input with a symmetric border of ``int1`` rows and
pad input with a symmetric border of ``int1`` rows and
``int2`` columns, then perform a valid convolution.
``int2`` columns, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
``((int1, int2), (int3, int4))``
pad input with an asymmetric border of ``(int1, int2)`` along
one dimension and ``(int3, int4)`` along the second dimension.
subsample : tuple of len 2
subsample : tuple of len 2
The subsampling used in the forward pass of the convolutional
The subsampling used in the forward pass of the convolutional
operation. Also called strides elsewhere.
operation. Also called strides elsewhere.
...
@@ -1484,6 +1586,108 @@ def conv3d_grad_wrt_weights(input,
...
@@ -1484,6 +1586,108 @@ def conv3d_grad_wrt_weights(input,
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[
-
3
:])
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[
-
3
:])
def
causal_conv1d
(
input
,
filters
,
filter_shape
,
input_shape
=
None
,
subsample
=
1
,
filter_flip
=
True
,
filter_dilation
=
1
,
num_groups
=
1
,
unshared
=
False
):
"""
Computes (dilated) causal convolution
The output at time t depends only on the inputs till t-1. Used for
modelling temporal data.
See [WaveNet: A Generative Model for Raw Audio, section 2.1]
(https://arxiv.org/abs/1609.03499).
Parameters
----------
input : symbolic 3D tensor
mini-batch of feature vector stacks, of shape
(batch_size, input_channels, input_length)
See the optional parameter ``input_shape``
filters : symbolic 3D tensor
Set of filters used in the CNN, of shape
(output_channels, input_channels, filter_length)
filter_shape : [None/int/Constant] * 2 + [Tensor/int/Constant]
The shape of the filters parameter.
A tuple/list of len 3, with the first two dimensions
being None or int or Constant and the last dimension being
Tensor or int or Constant.
Not optional, since the filter length is needed to calculate
the left padding for causality.
input_shape : None or [None/int/Constant] * 3
The shape of the input parameter.
None, or a tuple/list of len 3.
Optional, possibly used to choose an optimal implementation.
subsample : int
The factor by which to subsample the output. Also called strides
elsewhere.
filter_dilation : int
Factor by which to subsample (stride) the input. Also called
dilation factor.
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
unshared : bool
If true, then unshared or 'locally connected' convolution will be
performed. A different filter will be used for each region of the
input.
Returns
-------
Symbolic 3D tensor.
Set of feature vectors generated by convolutional layer. Tensor is
of shape (batch_size, output_channels, output_length)
Notes
-----
:note: Currently, this is implemented with the 2D convolution ops.
"""
input
=
as_tensor_variable
(
input
)
filters
=
as_tensor_variable
(
filters
)
if
input
.
ndim
!=
3
:
raise
ValueError
(
'Input should be 3D for causal convolution.'
)
if
filters
.
ndim
!=
3
:
raise
ValueError
(
'Filters should be 3D for causal convolution'
)
input
=
input
.
dimshuffle
(
0
,
1
,
2
,
'x'
)
filters
=
filters
.
dimshuffle
(
0
,
1
,
2
,
'x'
)
if
input_shape
is
not
None
:
assert
(
len
(
input_shape
)
==
3
)
input_shape
=
tuple
(
input_shape
)
input_shape
+=
(
1
,)
assert
(
len
(
filter_shape
)
==
3
)
filter_shape
=
tuple
(
filter_shape
)
filter_shape
+=
(
1
,)
left_pad
=
filter_dilation
*
(
filter_shape
[
2
]
-
1
)
subsample
=
(
subsample
,
1
)
filter_dilation
=
(
filter_dilation
,
1
)
conv_op
=
AbstractConv2d
(
imshp
=
input_shape
,
kshp
=
filter_shape
,
border_mode
=
((
left_pad
,
0
),
0
),
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
,
unshared
=
unshared
)
output
=
conv_op
(
input
,
filters
)
return
output
[:,
:,
:,
0
]
def
bilinear_kernel_2D
(
ratio
,
normalize
=
True
):
def
bilinear_kernel_2D
(
ratio
,
normalize
=
True
):
"""Compute 2D kernel for bilinear upsampling
"""Compute 2D kernel for bilinear upsampling
...
@@ -1689,7 +1893,7 @@ class BaseAbstractConv(Op):
...
@@ -1689,7 +1893,7 @@ class BaseAbstractConv(Op):
element is not known at compile time.
element is not known at compile time.
kshp is defined w.r.t the forward conv.
kshp is defined w.r.t the forward conv.
border_mode: str, int or tuple of ``convdim``
ints
border_mode: str, int or a tuple of two ints or pairs of
ints
Either of the following:
Either of the following:
``'valid'``: apply filter wherever it completely overlaps with the
``'valid'``: apply filter wherever it completely overlaps with the
...
@@ -1704,6 +1908,12 @@ class BaseAbstractConv(Op):
...
@@ -1704,6 +1908,12 @@ class BaseAbstractConv(Op):
width, then perform a valid convolution.
width, then perform a valid convolution.
``(int1, int2)``: (for 2D) pad input with a symmetric border of ``int1``,
``(int1, int2)``: (for 2D) pad input with a symmetric border of ``int1``,
``int2``, then perform a valid convolution.
``int2``, then perform a valid convolution.
``(int1, (int2, int3))`` or ``((int1, int2), int3)``: (for 2D)
pad input with one symmetric border of `int1`` or ``int3``, and
one asymmetric border of ``(int2, int3)`` or ``(int1, int2)``.
``((int1, int2), (int3, int4))``: (for 2D) pad input with an asymmetric
border of ``(int1, int2)`` along one dimension and ``(int3, int4)``
along the second dimension.
``(int1, int2, int3)``: (for 3D) pad input with a symmetric border of
``(int1, int2, int3)``: (for 3D) pad input with a symmetric border of
``int1``, ``int2`` and ``int3``, then perform a valid convolution.
``int1``, ``int2`` and ``int3``, then perform a valid convolution.
...
@@ -1751,21 +1961,40 @@ class BaseAbstractConv(Op):
...
@@ -1751,21 +1961,40 @@ class BaseAbstractConv(Op):
filter_dilation
=
(
1
,)
*
convdim
filter_dilation
=
(
1
,)
*
convdim
if
isinstance
(
border_mode
,
integer_types
):
if
isinstance
(
border_mode
,
integer_types
):
if
border_mode
<
0
:
raise
ValueError
(
'invalid border_mode {}, which must be a '
'non-negative integer'
.
format
(
border_mode
))
border_mode
=
(
border_mode
,)
*
convdim
border_mode
=
(
border_mode
,)
*
convdim
if
isinstance
(
border_mode
,
tuple
):
el
if
isinstance
(
border_mode
,
tuple
):
if
len
(
border_mode
)
!=
convdim
:
if
len
(
border_mode
)
!=
convdim
:
raise
ValueError
(
raise
ValueError
(
'border mode must have exactly {} values, '
'invalid border_mode {}, which must be a '
'but was {}'
.
format
(
convdim
,
border_mode
))
'tuple of length {}'
.
format
(
border_mode
,
convdim
))
border_mode
=
tuple
(
map
(
int
,
border_mode
))
new_border_mode
=
()
if
border_mode
==
(
0
,)
*
convdim
:
for
mode
in
border_mode
:
border_mode
=
'valid'
if
not
((
isinstance
(
mode
,
integer_types
)
and
mode
>=
0
)
or
if
not
((
isinstance
(
border_mode
,
tuple
)
and
min
(
border_mode
)
>=
0
)
or
(
isinstance
(
mode
,
tuple
)
and
len
(
mode
)
==
2
and
min
(
mode
)
>=
0
and
border_mode
in
(
'valid'
,
'full'
,
'half'
)):
all
(
isinstance
(
m
,
integer_types
)
for
m
in
mode
))):
raise
ValueError
(
'invalid border mode {}. The tuple can only contain integers '
' or pairs of integers'
.
format
(
border_mode
))
if
isinstance
(
mode
,
tuple
):
if
convdim
!=
2
:
raise
NotImplementedError
(
'Asymmetric padding not implemented for {}D'
.
format
(
convdim
))
if
mode
[
0
]
==
mode
[
1
]:
mode
=
mode
[
0
]
new_border_mode
+=
(
mode
,)
border_mode
=
new_border_mode
elif
border_mode
not
in
(
'valid'
,
'full'
,
'half'
):
raise
ValueError
(
raise
ValueError
(
'invalid border_mode {}, which must be either '
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of {}'
'"valid", "full", "half", an integer or a tuple '
' integers'
.
format
(
border_mode
,
convdim
))
'of length {}'
.
format
(
border_mode
,
convdim
))
if
isinstance
(
border_mode
,
tuple
)
and
\
all
(
mode
==
(
0
,
0
)
or
mode
==
0
for
mode
in
border_mode
):
border_mode
=
'valid'
self
.
imshp
=
tuple
(
imshp
)
if
imshp
else
(
None
,)
*
(
2
+
convdim
)
self
.
imshp
=
tuple
(
imshp
)
if
imshp
else
(
None
,)
*
(
2
+
convdim
)
for
imshp_i
in
self
.
imshp
:
for
imshp_i
in
self
.
imshp
:
...
@@ -2025,27 +2254,16 @@ class AbstractConv(BaseAbstractConv):
...
@@ -2025,27 +2254,16 @@ class AbstractConv(BaseAbstractConv):
%
self
.
convdim
)
%
self
.
convdim
)
o
,
=
out_
o
,
=
out_
mode
=
self
.
border_mode
mode
=
self
.
border_mode
pad
=
border_mode_to_pad
(
mode
,
self
.
convdim
,
dil_kernshp
)
if
not
((
isinstance
(
mode
,
tuple
)
and
min
(
mode
)
>=
0
)
or
if
any
(
p
!=
(
0
,
0
)
for
p
in
pad
):
mode
in
(
'valid'
,
'full'
,
'half'
)):
raise
ValueError
(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
' integers'
.
format
(
mode
))
if
mode
==
"full"
:
mode
=
tuple
(
dil_kernshp
[
i
]
-
1
for
i
in
range
(
self
.
convdim
))
elif
mode
==
"half"
:
mode
=
tuple
(
dil_kernshp
[
i
]
//
2
for
i
in
range
(
self
.
convdim
))
if
isinstance
(
mode
,
tuple
):
pad
=
tuple
(
int
(
mode
[
i
])
for
i
in
range
(
self
.
convdim
))
mode
=
"valid"
mode
=
"valid"
new_img
=
np
.
zeros
((
img
.
shape
[
0
],
img
.
shape
[
1
])
+
new_img
=
np
.
zeros
((
img
.
shape
[
0
],
img
.
shape
[
1
])
+
tuple
(
img
.
shape
[
i
+
2
]
+
2
*
pad
[
i
]
tuple
(
img
.
shape
[
i
+
2
]
+
pad
[
i
][
0
]
+
pad
[
i
][
1
]
for
i
in
range
(
self
.
convdim
)),
for
i
in
range
(
self
.
convdim
)),
dtype
=
img
.
dtype
)
dtype
=
img
.
dtype
)
new_img
[(
slice
(
None
),
slice
(
None
))
+
new_img
[(
slice
(
None
),
slice
(
None
))
+
tuple
(
slice
(
pad
[
i
]
,
img
.
shape
[
i
+
2
]
+
pad
[
i
])
tuple
(
slice
(
pad
[
i
]
[
0
],
img
.
shape
[
i
+
2
]
+
pad
[
i
][
0
])
for
i
in
range
(
self
.
convdim
))]
=
img
for
i
in
range
(
self
.
convdim
))]
=
img
img
=
new_img
img
=
new_img
if
not
self
.
filter_flip
:
if
not
self
.
filter_flip
:
...
@@ -2080,7 +2298,6 @@ class AbstractConv(BaseAbstractConv):
...
@@ -2080,7 +2298,6 @@ class AbstractConv(BaseAbstractConv):
conv_out
=
conv_out
[(
slice
(
None
),
slice
(
None
))
+
conv_out
=
conv_out
[(
slice
(
None
),
slice
(
None
))
+
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
tuple
(
slice
(
None
,
None
,
self
.
subsample
[
i
])
for
i
in
range
(
self
.
convdim
))]
for
i
in
range
(
self
.
convdim
))]
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
conv_out
)
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
conv_out
)
def
R_op
(
self
,
inputs
,
eval_points
):
def
R_op
(
self
,
inputs
,
eval_points
):
...
@@ -2296,34 +2513,21 @@ class AbstractConv_gradWeights(BaseAbstractConv):
...
@@ -2296,34 +2513,21 @@ class AbstractConv_gradWeights(BaseAbstractConv):
o
,
=
out_
o
,
=
out_
mode
=
self
.
border_mode
if
not
((
isinstance
(
mode
,
tuple
)
and
min
(
mode
)
>=
0
)
or
mode
in
(
'valid'
,
'full'
,
'half'
)):
raise
ValueError
(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
' integers'
.
format
(
mode
))
if
self
.
unshared
and
self
.
convdim
!=
2
:
if
self
.
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
%
self
.
convdim
)
dil_shape
=
tuple
((
shape
[
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
dil_shape
=
tuple
((
shape
[
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
for
i
in
range
(
self
.
convdim
))
if
mode
==
"full"
:
pad
=
border_mode_to_pad
(
self
.
border_mode
,
self
.
convdim
,
dil_shape
)
mode
=
tuple
(
dil_shape
[
i
]
-
1
for
i
in
range
(
self
.
convdim
))
elif
mode
==
"half"
:
mode
=
tuple
(
dil_shape
[
i
]
//
2
for
i
in
range
(
self
.
convdim
))
if
isinstance
(
mode
,
tuple
):
pad
=
tuple
(
int
(
mode
[
i
])
for
i
in
range
(
self
.
convdim
))
mode
=
"valid"
if
any
(
p
!=
(
0
,
0
)
for
p
in
pad
):
new_img
=
np
.
zeros
((
img
.
shape
[
0
],
img
.
shape
[
1
])
+
new_img
=
np
.
zeros
((
img
.
shape
[
0
],
img
.
shape
[
1
])
+
tuple
(
img
.
shape
[
i
+
2
]
+
2
*
pad
[
i
]
tuple
(
img
.
shape
[
i
+
2
]
+
pad
[
i
][
0
]
+
pad
[
i
][
1
]
for
i
in
range
(
self
.
convdim
)),
for
i
in
range
(
self
.
convdim
)),
dtype
=
img
.
dtype
)
dtype
=
img
.
dtype
)
new_img
[(
slice
(
None
),
slice
(
None
))
+
new_img
[(
slice
(
None
),
slice
(
None
))
+
tuple
(
slice
(
pad
[
i
]
,
img
.
shape
[
i
+
2
]
+
pad
[
i
])
tuple
(
slice
(
pad
[
i
]
[
0
],
img
.
shape
[
i
+
2
]
+
pad
[
i
][
0
])
for
i
in
range
(
self
.
convdim
))]
=
img
for
i
in
range
(
self
.
convdim
))]
=
img
img
=
new_img
img
=
new_img
...
@@ -2611,16 +2815,13 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2611,16 +2815,13 @@ class AbstractConv_gradInputs(BaseAbstractConv):
topgrad
=
np
.
asarray
(
topgrad
)
topgrad
=
np
.
asarray
(
topgrad
)
o
,
=
out_
o
,
=
out_
mode
=
self
.
border_mode
if
not
((
isinstance
(
mode
,
tuple
)
and
min
(
mode
)
>=
0
)
or
mode
in
(
'valid'
,
'full'
,
'half'
)):
raise
ValueError
(
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a tuple of'
' integers'
.
format
(
mode
))
if
self
.
unshared
and
self
.
convdim
!=
2
:
if
self
.
unshared
and
self
.
convdim
!=
2
:
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
raise
NotImplementedError
(
'Unshared convolution not implemented for
%
dD'
%
self
.
convdim
)
%
self
.
convdim
)
dil_kernshp
=
tuple
((
kern
.
shape
[
-
self
.
convdim
+
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
pad
=
border_mode_to_pad
(
self
.
border_mode
,
self
.
convdim
,
dil_kernshp
)
imshp
=
self
.
imshp
[:]
if
self
.
imshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
imshp
=
self
.
imshp
[:]
if
self
.
imshp
is
not
None
else
[
None
]
*
(
2
+
self
.
convdim
)
fallback_imshp
=
([
topgrad
.
shape
[
0
],
kern
.
shape
[
-
self
.
convdim
-
1
]]
+
fallback_imshp
=
([
topgrad
.
shape
[
0
],
kern
.
shape
[
-
self
.
convdim
-
1
]]
+
...
@@ -2636,20 +2837,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2636,20 +2837,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
'would produce an output of shape {}, but the given topgrad '
'would produce an output of shape {}, but the given topgrad '
'has shape {}'
.
format
(
tuple
(
expected_topgrad_shape
),
'has shape {}'
.
format
(
tuple
(
expected_topgrad_shape
),
tuple
(
topgrad
.
shape
)))
tuple
(
topgrad
.
shape
)))
dil_kernshp
=
tuple
((
kern
.
shape
[
-
self
.
convdim
+
i
]
-
1
)
*
self
.
filter_dilation
[
i
]
+
1
for
i
in
range
(
self
.
convdim
))
pad
=
(
0
,)
*
self
.
convdim
if
mode
==
"full"
:
pad
=
tuple
(
dil_kernshp
[
i
]
-
1
for
i
in
range
(
self
.
convdim
))
elif
mode
==
"half"
:
pad
=
tuple
(
dil_kernshp
[
i
]
//
2
for
i
in
range
(
self
.
convdim
))
elif
isinstance
(
mode
,
tuple
):
pad
=
tuple
(
mode
[
i
]
for
i
in
range
(
self
.
convdim
))
if
any
(
self
.
subsample
[
i
]
>
1
for
i
in
range
(
self
.
convdim
)):
if
any
(
self
.
subsample
[
i
]
>
1
for
i
in
range
(
self
.
convdim
)):
new_shape
=
((
topgrad
.
shape
[
0
],
topgrad
.
shape
[
1
])
+
new_shape
=
((
topgrad
.
shape
[
0
],
topgrad
.
shape
[
1
])
+
tuple
(
shape
[
i
]
+
2
*
pad
[
i
]
-
dil_kernshp
[
i
]
+
1
tuple
(
shape
[
i
]
+
pad
[
i
][
0
]
+
pad
[
i
][
1
]
-
dil_kernshp
[
i
]
+
1
for
i
in
range
(
self
.
convdim
)))
for
i
in
range
(
self
.
convdim
)))
new_topgrad
=
np
.
zeros
((
new_shape
),
dtype
=
topgrad
.
dtype
)
new_topgrad
=
np
.
zeros
((
new_shape
),
dtype
=
topgrad
.
dtype
)
new_topgrad
[(
slice
(
None
),
slice
(
None
))
+
new_topgrad
[(
slice
(
None
),
slice
(
None
))
+
...
@@ -2705,9 +2895,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
...
@@ -2705,9 +2895,9 @@ class AbstractConv_gradInputs(BaseAbstractConv):
if
self
.
filter_flip
:
if
self
.
filter_flip
:
img
=
img
[
flip_filters
]
img
=
img
[
flip_filters
]
if
any
(
p
>
0
for
p
in
pad
):
if
any
(
p
!=
(
0
,
0
)
for
p
in
pad
):
img
=
img
[(
slice
(
None
),
slice
(
None
))
+
img
=
img
[(
slice
(
None
),
slice
(
None
))
+
tuple
(
slice
(
pad
[
i
]
,
img
.
shape
[
i
+
2
]
-
pad
[
i
])
tuple
(
slice
(
pad
[
i
]
[
0
],
img
.
shape
[
i
+
2
]
-
pad
[
i
][
1
])
for
i
in
range
(
self
.
convdim
))]
for
i
in
range
(
self
.
convdim
))]
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
img
)
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
img
)
...
...
theano/tensor/nnet/c_code/corr_gemm.c
浏览文件 @
ea62004b
...
@@ -31,23 +31,23 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
...
@@ -31,23 +31,23 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
void
im2col
(
const
%
(
float_type
)
s
*
data_im
,
const
int
channels
,
void
im2col
(
const
%
(
float_type
)
s
*
data_im
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
pad_h
l
,
const
int
pad_hr
,
const
int
pad_wl
,
const
int
pad_wr
,
const
int
stride_h
,
const
int
stride_w
,
const
int
stride_h
,
const
int
stride_w
,
%
(
float_type
)
s
*
data_col
)
{
%
(
float_type
)
s
*
data_col
)
{
// Implicit dilated kernel size
// Implicit dilated kernel size
int
dil_kernel_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
int
dil_kernel_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
int
dil_kernel_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
int
dil_kernel_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
int
height_col
=
(
height
+
2
*
pad_h
-
dil_kernel_h
)
/
stride_h
+
1
;
int
height_col
=
(
height
+
pad_hl
+
pad_hr
-
dil_kernel_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
dil_kernel_w
)
/
stride_w
+
1
;
int
width_col
=
(
width
+
pad_wl
+
pad_wr
-
dil_kernel_w
)
/
stride_w
+
1
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%%
kernel_w
;
int
w_offset
=
c
%%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%%
kernel_h
;
int
h_offset
=
(
c
/
kernel_w
)
%%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
int
h_pad
=
h
*
stride_h
-
pad_h
l
+
h_offset
*
dilation_h
;
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
int
w_pad
=
w
*
stride_w
-
pad_w
l
+
w_offset
*
dilation_w
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
...
@@ -64,13 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels,
...
@@ -64,13 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels,
void
col2im
(
const
%
(
float_type
)
s
*
data_col
,
const
int
channels
,
void
col2im
(
const
%
(
float_type
)
s
*
data_col
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
patch_h
,
const
int
patch_w
,
const
int
height
,
const
int
width
,
const
int
patch_h
,
const
int
patch_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
pad_hl
,
const
int
pad_hr
,
const
int
pad_wl
,
const
int
pad_wr
,
const
int
stride_w
,
%
(
float_type
)
s
*
data_im
)
{
const
int
stride_h
,
const
int
stride_w
,
%
(
float_type
)
s
*
data_im
)
{
// Implicit dilated patch
// Implicit dilated patch
int
dil_patch_h
=
(
patch_h
-
1
)
*
dilation_h
+
1
;
int
dil_patch_h
=
(
patch_h
-
1
)
*
dilation_h
+
1
;
int
dil_patch_w
=
(
patch_w
-
1
)
*
dilation_w
+
1
;
int
dil_patch_w
=
(
patch_w
-
1
)
*
dilation_w
+
1
;
int
height_col
=
(
height
+
2
*
pad_h
-
dil_patch_h
)
/
stride_h
+
1
;
int
height_col
=
(
height
+
pad_hl
+
pad_hr
-
dil_patch_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
dil_patch_w
)
/
stride_w
+
1
;
int
width_col
=
(
width
+
pad_wl
+
pad_wr
-
dil_patch_w
)
/
stride_w
+
1
;
int
num_kernels
=
channels
*
height
*
width
;
int
num_kernels
=
channels
*
height
*
width
;
int
channels_col
=
channels
*
patch_h
*
patch_w
;
int
channels_col
=
channels
*
patch_h
*
patch_w
;
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
...
@@ -78,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels,
...
@@ -78,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels,
int
h_offset
=
(
c
/
patch_w
)
%%
patch_h
;
int
h_offset
=
(
c
/
patch_w
)
%%
patch_h
;
int
c_im
=
c
/
patch_h
/
patch_w
;
int
c_im
=
c
/
patch_h
/
patch_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
int
h_pad
=
h
*
stride_h
-
pad_h
l
+
h_offset
*
dilation_h
;
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
int
w_pad
=
w
*
stride_w
-
pad_w
l
+
w_offset
*
dilation_w
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
]
+=
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
]
+=
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
];
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
];
...
@@ -105,8 +106,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -105,8 +106,10 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
dW
=
1
,
const
int
dW
=
1
,
const
int
dilH
=
1
,
const
int
dilH
=
1
,
const
int
dilW
=
1
,
const
int
dilW
=
1
,
const
int
padH
=
0
,
const
int
padH_l
=
0
,
const
int
padW
=
0
,
const
int
padH_r
=
0
,
const
int
padW_l
=
0
,
const
int
padW_r
=
0
,
const
int
numgroups
=
1
,
const
int
numgroups
=
1
,
const
int
unshared
=
0
)
const
int
unshared
=
0
)
{
{
...
@@ -172,8 +175,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -172,8 +175,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
int
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth)
// top: (batchSize, nFilters, topHeight, topWidth)
const
int
topHeightNoDH
=
(
bottomHeight
+
2
*
padH
-
dil_kH
);
const
int
topHeightNoDH
=
(
bottomHeight
+
padH_l
+
padH_r
-
dil_kH
);
const
int
topWidthNoDW
=
(
bottomWidth
+
2
*
padW
-
dil_kW
);
const
int
topWidthNoDW
=
(
bottomWidth
+
padW_l
+
padW_r
-
dil_kW
);
// the above values might be negative so we need to use Python-like
// the above values might be negative so we need to use Python-like
// flooring integer division to be compatible with get_conv_output.
// flooring integer division to be compatible with get_conv_output.
// note: this macro implements Python's // for negative x only
// note: this macro implements Python's // for negative x only
...
@@ -303,7 +306,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -303,7 +306,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
int
tid
=
%
(
omp_get_thread_num
)
s
;
int
tid
=
%
(
omp_get_thread_num
)
s
;
// First, im2col
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
nChannels
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
_l
,
padH_r
,
padW_l
,
padW_r
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
// Second, gemm
// Second, gemm
if
(
unshared
)
{
if
(
unshared
)
{
...
@@ -396,7 +399,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -396,7 +399,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
int
tid
=
%
(
omp_get_thread_num
)
s
;
int
tid
=
%
(
omp_get_thread_num
)
s
;
// First, im2col
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
_l
,
padH_r
,
padW_l
,
padW_r
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
// Second, gemm
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// Note that we accumulate into weight. We do so by setting beta = 0
...
@@ -519,7 +522,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -519,7 +522,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
}
}
// col2im back to the data
// col2im back to the data
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
kH
,
kW
,
dilH
,
dilW
,
padH
_l
,
padH_r
,
padW_l
,
padW_r
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
);
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
batch_bottom_stride
);
}
}
// Restore to previous blas threads
// Restore to previous blas threads
...
...
theano/tensor/nnet/corr.py
浏览文件 @
ea62004b
...
@@ -34,8 +34,8 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -34,8 +34,8 @@ class BaseCorrMM(gof.OpenMPOp):
Parameters
Parameters
----------
----------
border_mode : {'valid', 'full', 'half'}
border_mode : {'valid', 'full', 'half'}
Additionally, the padding size could be directly specified by an integer
Additionally, the padding size could be directly specified by an integer
,
or a pair of integers
a pair of integers, or two pairs of integers.
subsample
subsample
Perform subsampling of the output (default: (1, 1)).
Perform subsampling of the output (default: (1, 1)).
filter_dilation
filter_dilation
...
@@ -55,7 +55,8 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -55,7 +55,8 @@ class BaseCorrMM(gof.OpenMPOp):
(
'DIRECTION_BACKPROP_INPUTS'
,
'backprop inputs'
)),
# 2
(
'DIRECTION_BACKPROP_INPUTS'
,
'backprop inputs'
)),
# 2
dH
=
int64
,
dW
=
int64
,
dH
=
int64
,
dW
=
int64
,
dilH
=
int64
,
dilW
=
int64
,
dilH
=
int64
,
dilW
=
int64
,
padH
=
int64
,
padW
=
int64
,
padH_l
=
int64
,
padH_r
=
int64
,
padW_l
=
int64
,
padW_r
=
int64
,
num_groups
=
int64
,
unshared
=
int8
)
num_groups
=
int64
,
unshared
=
int8
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
...
@@ -66,20 +67,29 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -66,20 +67,29 @@ class BaseCorrMM(gof.OpenMPOp):
raise
ValueError
(
raise
ValueError
(
'invalid border_mode {}, which must be a '
'invalid border_mode {}, which must be a '
'non-negative integer'
.
format
(
border_mode
))
'non-negative integer'
.
format
(
border_mode
))
border_mode
=
(
border_mode
,
border_mode
)
border_mode
=
(
(
border_mode
,
border_mode
),)
*
2
if
isinstance
(
border_mode
,
tuple
):
el
if
isinstance
(
border_mode
,
tuple
):
if
len
(
border_mode
)
!=
2
or
border_mode
[
0
]
<
0
or
border_mode
[
1
]
<
0
:
if
len
(
border_mode
)
!=
2
:
raise
ValueError
(
raise
ValueError
(
'invalid border_mode {}, which must be a '
'invalid border_mode {} which must be a '
'pair of non-negative integers'
.
format
(
border_mode
))
'tuple of length 2'
.
format
(
border_mode
))
pad_h
,
pad_w
=
map
(
int
,
border_mode
)
border
=
()
border_mode
=
(
pad_h
,
pad_w
)
for
mode
in
border_mode
:
if
not
((
isinstance
(
border_mode
,
tuple
)
and
min
(
border_mode
)
>=
0
)
or
if
isinstance
(
mode
,
tuple
)
and
len
(
mode
)
==
2
and
\
border_mode
in
(
'valid'
,
'full'
,
'half'
)):
min
(
mode
)
>=
0
:
border
+=
((
int
(
mode
[
0
]),
int
(
mode
[
1
])),)
elif
mode
>=
0
:
border
+=
((
int
(
mode
),
int
(
mode
)),)
else
:
raise
ValueError
(
'invalid border mode {}. The tuple can only contain '
'integers or tuples of length 2'
.
format
(
border_mode
))
border_mode
=
border
elif
border_mode
not
in
(
'valid'
,
'full'
,
'half'
):
raise
ValueError
(
raise
ValueError
(
'invalid border_mode {}, which must be either '
'invalid border_mode {}, which must be either '
'"valid", "full", "half", an integer or a
pair of
'
'"valid", "full", "half", an integer or a
tuple
'
' integers'
.
format
(
border_mode
))
'
of two integers or a pair of
integers'
.
format
(
border_mode
))
self
.
border_mode
=
border_mode
self
.
border_mode
=
border_mode
if
len
(
subsample
)
!=
2
:
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
raise
ValueError
(
"subsample must have two elements"
)
...
@@ -110,14 +120,14 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -110,14 +120,14 @@ class BaseCorrMM(gof.OpenMPOp):
@property
@property
def
pad
(
self
):
def
pad
(
self
):
if
self
.
border_mode
==
"half"
:
if
self
.
border_mode
==
"half"
:
return
(
-
1
,
-
1
)
return
(
(
-
1
,
-
1
),)
*
2
elif
self
.
border_mode
==
"full"
:
elif
self
.
border_mode
==
"full"
:
return
(
-
2
,
-
2
)
return
(
(
-
2
,
-
2
),)
*
2
elif
isinstance
(
self
.
border_mode
,
tuple
):
elif
isinstance
(
self
.
border_mode
,
tuple
):
return
self
.
border_mode
return
self
.
border_mode
else
:
else
:
assert
self
.
border_mode
==
"valid"
assert
self
.
border_mode
==
"valid"
return
(
0
,
0
)
return
(
(
0
,
0
),)
*
2
# Direction should be converted to real enum value,
# Direction should be converted to real enum value,
# as it is compared to integer later in c_code_helper().
# as it is compared to integer later in c_code_helper().
...
@@ -129,8 +139,10 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -129,8 +139,10 @@ class BaseCorrMM(gof.OpenMPOp):
dilH
=
property
(
lambda
self
:
self
.
filter_dilation
[
0
])
dilH
=
property
(
lambda
self
:
self
.
filter_dilation
[
0
])
dilW
=
property
(
lambda
self
:
self
.
filter_dilation
[
1
])
dilW
=
property
(
lambda
self
:
self
.
filter_dilation
[
1
])
padH
=
property
(
lambda
self
:
self
.
pad
[
0
])
padH_l
=
property
(
lambda
self
:
self
.
pad
[
0
][
0
])
padW
=
property
(
lambda
self
:
self
.
pad
[
1
])
padH_r
=
property
(
lambda
self
:
self
.
pad
[
0
][
1
])
padW_l
=
property
(
lambda
self
:
self
.
pad
[
1
][
0
])
padW_r
=
property
(
lambda
self
:
self
.
pad
[
1
][
1
])
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s,
%
s
%
s}'
%
(
return
'
%
s{
%
s,
%
s,
%
s,
%
s
%
s}'
%
(
...
@@ -183,7 +195,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -183,7 +195,7 @@ class BaseCorrMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
9
,
self
.
openmp
,
blas_header_version
())
return
(
10
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -271,13 +283,13 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -271,13 +283,13 @@ class BaseCorrMM(gof.OpenMPOp):
if
height
:
if
height
:
height
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
height
height
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
height
else
:
else
:
if
((
self
.
direction
!=
0
)
and
(
self
.
dH
!=
1
))
or
((
self
.
direction
==
1
)
and
(
self
.
padH
==
-
1
)):
if
((
self
.
direction
!=
0
)
and
(
self
.
dH
!=
1
))
or
((
self
.
direction
==
1
)
and
(
self
.
padH
_l
==
-
1
or
self
.
padH_r
==
-
1
)):
raise
ValueError
(
"height must be given for backprop with vertical sampling or border_mode='half'"
)
raise
ValueError
(
"height must be given for backprop with vertical sampling or border_mode='half'"
)
height
=
'-1'
height
=
'-1'
if
width
:
if
width
:
width
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
width
width
=
'(*(npy_int64 *)(PyArray_DATA(
%
s)))'
%
width
else
:
else
:
if
((
self
.
direction
!=
0
)
and
(
self
.
dW
!=
1
))
or
((
self
.
direction
==
1
)
and
(
self
.
padW
==
-
1
)):
if
((
self
.
direction
!=
0
)
and
(
self
.
dW
!=
1
))
or
((
self
.
direction
==
1
)
and
(
self
.
padW
_l
==
-
1
or
self
.
padW_r
==
-
1
)):
raise
ValueError
(
"width must be given for backprop with horizontal sampling or border_mode='half'"
)
raise
ValueError
(
"width must be given for backprop with horizontal sampling or border_mode='half'"
)
width
=
'-1'
width
=
'-1'
...
@@ -290,8 +302,10 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -290,8 +302,10 @@ class BaseCorrMM(gof.OpenMPOp):
int dW =
%(params)
s->dW;
int dW =
%(params)
s->dW;
int dilH =
%(params)
s->dilH;
int dilH =
%(params)
s->dilH;
int dilW =
%(params)
s->dilW;
int dilW =
%(params)
s->dilW;
int padH =
%(params)
s->padH;
int padH_l =
%(params)
s->padH_l;
int padW =
%(params)
s->padW;
int padH_r =
%(params)
s->padH_r;
int padW_l =
%(params)
s->padW_l;
int padW_r =
%(params)
s->padW_r;
int numgroups =
%(params)
s->num_groups;
int numgroups =
%(params)
s->num_groups;
int unshared =
%(params)
s->unshared;
int unshared =
%(params)
s->unshared;
...
@@ -334,23 +348,23 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -334,23 +348,23 @@ class BaseCorrMM(gof.OpenMPOp):
// kernel height is specified (perhaps vertical subsampling or half padding)
// kernel height is specified (perhaps vertical subsampling or half padding)
kH =
%(height)
s;
kH =
%(height)
s;
}
}
else if (padH == -2) {
else if (padH
_l == -2 || padH_r
== -2) {
// vertical full padding, we can infer the kernel height
// vertical full padding, we can infer the kernel height
kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1;
kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1;
}
}
else {
else {
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH = (PyArray_DIMS(bottom)[2] +
2*padH
- (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
kH = (PyArray_DIMS(bottom)[2] +
padH_l + padH_r
- (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
}
}
if (
%(width)
s != -1) {
if (
%(width)
s != -1) {
// kernel width is specified (perhaps horizontal subsampling or half padding)
// kernel width is specified (perhaps horizontal subsampling or half padding)
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW
_l == -2 || padW_r
== -2) {
kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
else {
else {
kW = (PyArray_DIMS(bottom)[3] +
2*padW
- (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
kW = (PyArray_DIMS(bottom)[3] +
padW_l + padW_r
- (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
}
}
}
}
...
@@ -359,24 +373,24 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -359,24 +373,24 @@ class BaseCorrMM(gof.OpenMPOp):
dil_kW = (kW - 1) * dilW + 1;
dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH
_l == -1 || padH_r
== -1) { // vertical half padding
padH = dil_kH / 2;
padH
_l = padH_r
= dil_kH / 2;
}
}
else if (padH == -2) { // vertical full padding
else if (padH
_l == -2 || padH_r
== -2) { // vertical full padding
padH = dil_kH - 1;
padH
_l = padH_r
= dil_kH - 1;
}
}
else if (padH
< 0
) {
else if (padH
_l < -2 || padH_r < -2
) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH
_l and padH_r
must be >= -2");
%(fail)
s
%(fail)
s
}
}
if (padW == -1) { // horizontal half padding
if (padW
_l == -1 || padW_r
== -1) { // horizontal half padding
padW = dil_kW / 2;
padW
_l = padW_r
= dil_kW / 2;
}
}
else if (padW == -2) { // horizontal full padding
else if (padW
_l == -2 || padW_r
== -2) { // horizontal full padding
padW = dil_kW - 1;
padW
_l = padW_r
= dil_kW - 1;
}
}
else if (padW
< 0
) {
else if (padW
_l < -2 || padW_r < -2
) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW
_l and padW_r
must be >= -2");
%(fail)
s
%(fail)
s
}
}
...
@@ -386,11 +400,11 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -386,11 +400,11 @@ class BaseCorrMM(gof.OpenMPOp):
switch(direction) {
switch(direction) {
case 0: // forward pass
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom +
2*pad
- ((weight-1)*dil + 1)) / sample + 1
// height and width: top = (bottom +
pad_l + pad_r
- ((weight-1)*dil + 1)) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] +
2*padH
- ((PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1);
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] +
padH_l + padH_r
- ((PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] +
2*padW
- ((PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] +
padW_l + padW_r
- ((PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
{
if (unshared) {
if (unshared) {
...
@@ -425,7 +439,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -425,7 +439,7 @@ class BaseCorrMM(gof.OpenMPOp):
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = (bottom +
2*pad
- (top - 1) * sample - 1) / dil + 1
// height and width: weights = (bottom +
pad_l + pad_r
- (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
if (unshared){
if (unshared){
odim = 6;
odim = 6;
...
@@ -475,8 +489,8 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -475,8 +489,8 @@ class BaseCorrMM(gof.OpenMPOp):
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[wdim-3] * numgroups;
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1 -
2*padH
);
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1 -
padH_l - padH_r
);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1 -
2*padW
);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1 -
padW_l - padW_r
);
if (unshared) {
if (unshared) {
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
{
{
...
@@ -564,7 +578,8 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -564,7 +578,8 @@ class BaseCorrMM(gof.OpenMPOp):
}
}
// Call corrMM code
// Call corrMM code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW, numgroups, unshared);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW,
padH_l, padH_r, padW_l, padW_r, numgroups, unshared);
if (out2==NULL){
if (out2==NULL){
%(fail)
s
%(fail)
s
}
}
...
@@ -588,8 +603,11 @@ class CorrMM(BaseCorrMM):
...
@@ -588,8 +603,11 @@ class CorrMM(BaseCorrMM):
``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'``
``'valid'`` for ``(0, 0)`` (valid convolution, no padding), ``'full'``
for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution),
for ``(kernel_rows - 1, kernel_columns - 1)`` (full convolution),
``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same
``'half'`` for ``(kernel_rows // 2, kernel_columns // 2)`` (same
convolution for odd-sized kernels). Note that the two widths are each
convolution for odd-sized kernels).
applied twice, once per side (left and right, top and bottom).
If it is a tuple containing 2 pairs of integers, then these specify
the padding to be applied on each side ((left, right), (top, bottom)).
Otherwise, each width is applied twice, once per side (left and right,
top and bottom).
subsample
subsample
The subsample operation applied to each output image.
The subsample operation applied to each output image.
Should be a tuple with 2 elements.
Should be a tuple with 2 elements.
...
@@ -706,14 +724,20 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -706,14 +724,20 @@ class CorrMM_gradWeights(BaseCorrMM):
def
infer_shape
(
self
,
node
,
input_shape
):
def
infer_shape
(
self
,
node
,
input_shape
):
if
self
.
border_mode
==
"half"
:
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
-
1
elif
self
.
border_mode
==
"full"
:
elif
self
.
border_mode
==
"full"
:
padH
=
padW
=
-
2
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
-
2
elif
isinstance
(
self
.
border_mode
,
tuple
):
elif
isinstance
(
self
.
border_mode
,
tuple
):
padH
,
padW
=
self
.
border_mode
border
=
()
for
mode
in
self
.
border_mode
:
if
isinstance
(
mode
,
tuple
):
border
+=
((
int
(
mode
[
0
]),
int
(
mode
[
1
])),)
else
:
border
+=
((
int
(
mode
),
int
(
mode
)),)
(
padH_l
,
padH_r
),
(
padW_l
,
padW_r
)
=
border
else
:
else
:
assert
self
.
border_mode
==
"valid"
assert
self
.
border_mode
==
"valid"
padH
=
padW
=
0
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
0
dH
,
dW
=
self
.
subsample
dH
,
dW
=
self
.
subsample
imshp
=
input_shape
[
0
]
imshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
topshp
=
input_shape
[
1
]
...
@@ -721,21 +745,21 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -721,21 +745,21 @@ class CorrMM_gradWeights(BaseCorrMM):
ssize
=
ssize
//
self
.
num_groups
ssize
=
ssize
//
self
.
num_groups
nkern
,
topshp
=
topshp
[
1
],
list
(
topshp
[
2
:])
nkern
,
topshp
=
topshp
[
1
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
height_width
=
node
.
inputs
[
-
2
:]
if
((
dH
!=
1
)
or
(
padH
==
-
1
)):
if
((
dH
!=
1
)
or
(
padH
_l
==
-
1
)
or
(
padH_r
==
-
1
)):
# vertical subsampling or half padding, kernel height is specified
# vertical subsampling or half padding, kernel height is specified
kH
=
height_width
[
0
]
kH
=
height_width
[
0
]
elif
padH
==
-
2
:
elif
(
padH_l
==
-
2
)
or
(
padH_r
==
-
2
)
:
# vertical full padding, we can infer the kernel height
# vertical full padding, we can infer the kernel height
kH
=
2
-
imshp
[
0
]
+
(
topshp
[
0
]
-
1
)
*
dH
kH
=
2
-
imshp
[
0
]
+
(
topshp
[
0
]
-
1
)
*
dH
else
:
else
:
# explicit padding, we can infer the kernel height
# explicit padding, we can infer the kernel height
kH
=
imshp
[
0
]
+
2
*
padH
-
(
topshp
[
0
]
-
1
)
*
dH
kH
=
imshp
[
0
]
+
padH_l
+
padH_r
-
(
topshp
[
0
]
-
1
)
*
dH
if
((
dW
!=
1
)
or
(
padW
==
-
1
)):
if
((
dW
!=
1
)
or
(
padW
_l
==
-
1
)
or
(
padW_r
==
-
1
)):
kW
=
height_width
[
1
]
kW
=
height_width
[
1
]
elif
(
padW
==
-
2
):
elif
(
padW
_l
==
-
2
)
or
(
padW_r
==
-
2
):
kW
=
2
-
imshp
[
1
]
+
(
topshp
[
1
]
-
1
)
*
dW
kW
=
2
-
imshp
[
1
]
+
(
topshp
[
1
]
-
1
)
*
dW
else
:
else
:
kW
=
imshp
[
1
]
+
2
*
padW
-
(
topshp
[
1
]
-
1
)
*
dW
kW
=
imshp
[
1
]
+
padW_l
+
padW_r
-
(
topshp
[
1
]
-
1
)
*
dW
if
self
.
unshared
is
True
:
if
self
.
unshared
is
True
:
return
[(
nkern
,
topshp
[
0
],
topshp
[
1
],
ssize
,
kH
,
kW
)]
return
[(
nkern
,
topshp
[
0
],
topshp
[
1
],
ssize
,
kH
,
kW
)]
else
:
else
:
...
@@ -820,14 +844,20 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -820,14 +844,20 @@ class CorrMM_gradInputs(BaseCorrMM):
def
infer_shape
(
self
,
node
,
input_shape
):
def
infer_shape
(
self
,
node
,
input_shape
):
if
self
.
border_mode
==
"half"
:
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
-
1
elif
self
.
border_mode
==
"full"
:
elif
self
.
border_mode
==
"full"
:
padH
=
padW
=
-
2
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
-
2
elif
isinstance
(
self
.
border_mode
,
tuple
):
elif
isinstance
(
self
.
border_mode
,
tuple
):
padH
,
padW
=
self
.
border_mode
border
=
()
for
mode
in
self
.
border_mode
:
if
isinstance
(
mode
,
tuple
):
border
+=
((
int
(
mode
[
0
]),
int
(
mode
[
1
])),)
else
:
border
+=
((
int
(
mode
),
int
(
mode
)),)
(
padH_l
,
padH_r
),
(
padW_l
,
padW_r
)
=
border
else
:
else
:
assert
self
.
border_mode
==
"valid"
assert
self
.
border_mode
==
"valid"
padH
=
padW
=
0
padH
_l
=
padH_r
=
padW_l
=
padW_r
=
0
dH
,
dW
=
self
.
subsample
dH
,
dW
=
self
.
subsample
kshp
=
input_shape
[
0
]
kshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
topshp
=
input_shape
[
1
]
...
@@ -835,27 +865,27 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -835,27 +865,27 @@ class CorrMM_gradInputs(BaseCorrMM):
ssize
=
ssize
*
self
.
num_groups
ssize
=
ssize
*
self
.
num_groups
bsize
,
topshp
=
topshp
[
0
],
list
(
topshp
[
2
:])
bsize
,
topshp
=
topshp
[
0
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
height_width
=
node
.
inputs
[
-
2
:]
if
padH
==
-
1
:
if
padH
_l
==
-
1
or
padH_r
==
-
1
:
padH
=
kshp
[
0
]
//
2
padH
_l
=
padH_r
=
kshp
[
0
]
//
2
elif
padH
==
-
2
:
elif
padH
_l
==
-
2
or
padH_r
==
-
2
:
padH
=
kshp
[
0
]
-
1
padH
_l
=
padH_r
=
kshp
[
0
]
-
1
elif
padH
<
-
2
:
elif
padH
_l
<
-
2
or
padH_r
<
-
2
:
raise
ValueError
(
'CorrMM_gradInputs: border_mode must be >= 0.'
)
raise
ValueError
(
'CorrMM_gradInputs: border_mode must be >= 0.'
)
if
padW
==
-
1
:
if
padW
_l
==
-
1
or
padW_r
==
-
1
:
padW
=
kshp
[
1
]
//
2
padW
_l
=
padW_r
=
kshp
[
1
]
//
2
elif
padW
==
-
2
:
elif
padW
_l
==
-
2
or
padW_r
==
-
2
:
padW
=
kshp
[
1
]
-
1
padW
_l
=
padW_r
=
kshp
[
1
]
-
1
elif
padW
<
-
2
:
elif
padW
_l
<
-
2
or
padW_r
<
-
2
:
raise
ValueError
(
'CorrMM_gradInputs: border_mode must be >= 0.'
)
raise
ValueError
(
'CorrMM_gradInputs: border_mode must be >= 0.'
)
if
dH
!=
1
:
if
dH
!=
1
:
out_shp0
=
height_width
[
0
]
out_shp0
=
height_width
[
0
]
else
:
else
:
out_shp0
=
(
topshp
[
0
]
-
1
)
*
dH
+
kshp
[
0
]
-
2
*
padH
out_shp0
=
(
topshp
[
0
]
-
1
)
*
dH
+
kshp
[
0
]
-
padH_l
-
padH_r
if
dW
!=
1
:
if
dW
!=
1
:
out_shp1
=
height_width
[
1
]
out_shp1
=
height_width
[
1
]
else
:
else
:
out_shp1
=
(
topshp
[
1
]
-
1
)
*
dW
+
kshp
[
1
]
-
2
*
padW
out_shp1
=
(
topshp
[
1
]
-
1
)
*
dW
+
kshp
[
1
]
-
padW_l
-
padW_r
out_shp
=
(
out_shp0
,
out_shp1
)
out_shp
=
(
out_shp0
,
out_shp1
)
return
[(
bsize
,
ssize
)
+
out_shp
]
return
[(
bsize
,
ssize
)
+
out_shp
]
...
...
theano/tensor/nnet/tests/test_abstract_conv.py
浏览文件 @
ea62004b
...
@@ -24,6 +24,7 @@ from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
...
@@ -24,6 +24,7 @@ from theano.tensor.nnet.abstract_conv import bilinear_kernel_1D
from
theano.tensor.nnet.abstract_conv
import
bilinear_kernel_2D
from
theano.tensor.nnet.abstract_conv
import
bilinear_kernel_2D
from
theano.tensor.nnet.abstract_conv
import
bilinear_upsampling
from
theano.tensor.nnet.abstract_conv
import
bilinear_upsampling
from
theano.tensor.nnet.abstract_conv
import
separable_conv2d
,
separable_conv3d
from
theano.tensor.nnet.abstract_conv
import
separable_conv2d
,
separable_conv3d
from
theano.tensor.nnet.abstract_conv
import
causal_conv1d
from
theano.tensor.nnet.corr
import
(
CorrMM
,
CorrMM_gradWeights
,
from
theano.tensor.nnet.corr
import
(
CorrMM
,
CorrMM_gradWeights
,
CorrMM_gradInputs
)
CorrMM_gradInputs
)
from
theano.tensor.nnet.corr3d
import
(
Corr3dMM
,
Corr3dMM_gradWeights
,
from
theano.tensor.nnet.corr3d
import
(
Corr3dMM
,
Corr3dMM_gradWeights
,
...
@@ -1894,3 +1895,156 @@ class TestUnsharedConv(unittest.TestCase):
...
@@ -1894,3 +1895,156 @@ class TestUnsharedConv(unittest.TestCase):
if
verify
:
if
verify
:
utt
.
verify_grad
(
conv_gradinputs
,
[
kern
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
utt
.
verify_grad
(
conv_gradinputs
,
[
kern
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
class
TestAsymmetricPadding
(
unittest
.
TestCase
):
conv2d
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
conv2d_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
mode
=
theano
.
compile
.
mode
.
Mode
(
optimizer
=
'None'
)
img_shape
=
[(
2
,
2
,
4
,
4
),
(
3
,
2
,
4
,
2
),
(
3
,
3
,
5
,
3
)]
kern_shape
=
[(
4
,
2
,
2
,
2
),
(
2
,
2
,
4
,
2
),
(
2
,
3
,
3
,
3
)]
topgrad_shape
=
[(
2
,
4
,
6
,
6
),
(
3
,
2
,
3
,
4
),
(
3
,
2
,
6
,
1
)]
border_mode
=
[((
1
,
2
),
(
2
,
1
)),
((
1
,
1
),
(
0
,
3
)),
((
2
,
1
),
(
0
,
0
))]
def
test_fwd
(
self
):
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
kern_sym
=
theano
.
tensor
.
tensor4
(
'kern'
)
for
imshp
,
kshp
,
pad
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
border_mode
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
asymmetric_conv_op
=
self
.
conv2d
(
border_mode
=
pad
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
))
asymmetric_out_sym
=
asymmetric_conv_op
(
img_sym
,
kern_sym
)
asymmetric_func
=
theano
.
function
([
img_sym
,
kern_sym
],
asymmetric_out_sym
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_op
)
for
node
in
asymmetric_func
.
maker
.
fgraph
.
toposort
()])
asymmetric_output
=
asymmetric_func
(
img
,
kern
)
ref_conv_op
=
self
.
conv2d
(
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
))
ref_out_sym
=
ref_conv_op
(
img_sym
,
kern_sym
)
ref_func
=
theano
.
function
([
img_sym
,
kern_sym
],
ref_out_sym
,
mode
=
self
.
mode
)
exp_imshp
=
(
imshp
[
0
],
imshp
[
1
],
imshp
[
2
]
+
pad
[
0
][
0
]
+
pad
[
0
][
1
],
imshp
[
3
]
+
pad
[
1
][
0
]
+
pad
[
1
][
1
])
exp_img
=
np
.
zeros
(
exp_imshp
,
dtype
=
theano
.
config
.
floatX
)
exp_img
[:,
:,
pad
[
0
][
0
]:
imshp
[
2
]
+
pad
[
0
][
0
],
pad
[
1
][
0
]:
imshp
[
3
]
+
pad
[
1
][
0
]]
=
img
ref_output
=
ref_func
(
exp_img
,
kern
)
utt
.
assert_allclose
(
asymmetric_output
,
ref_output
)
utt
.
verify_grad
(
asymmetric_conv_op
,
[
img
,
kern
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradweight
(
self
):
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
for
imshp
,
kshp
,
topshp
,
pad
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
topgrad_shape
,
self
.
border_mode
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
topshp
)
.
astype
(
theano
.
config
.
floatX
)
asymmetric_conv_op
=
self
.
conv2d_gradw
(
border_mode
=
pad
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
))
asymmetric_out_sym
=
asymmetric_conv_op
(
img_sym
,
top_sym
,
kshp
[
-
2
:])
asymmetric_func
=
theano
.
function
([
img_sym
,
top_sym
],
asymmetric_out_sym
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradw_op
)
for
node
in
asymmetric_func
.
maker
.
fgraph
.
toposort
()])
asymmetric_output
=
asymmetric_func
(
img
,
top
)
ref_conv_op
=
self
.
conv2d_gradw
(
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
))
ref_out_sym
=
ref_conv_op
(
img_sym
,
top_sym
,
kshp
[
-
2
:])
ref_func
=
theano
.
function
([
img_sym
,
top_sym
],
ref_out_sym
,
mode
=
self
.
mode
)
exp_imshp
=
(
imshp
[
0
],
imshp
[
1
],
imshp
[
2
]
+
pad
[
0
][
0
]
+
pad
[
0
][
1
],
imshp
[
3
]
+
pad
[
1
][
0
]
+
pad
[
1
][
1
])
exp_img
=
np
.
zeros
(
exp_imshp
,
dtype
=
theano
.
config
.
floatX
)
exp_img
[:,
:,
pad
[
0
][
0
]:
imshp
[
2
]
+
pad
[
0
][
0
],
pad
[
1
][
0
]:
imshp
[
3
]
+
pad
[
1
][
0
]]
=
img
ref_output
=
ref_func
(
exp_img
,
top
)
utt
.
assert_allclose
(
asymmetric_output
,
ref_output
)
def
conv_gradweight
(
inputs_val
,
output_val
):
return
asymmetric_conv_op
(
inputs_val
,
output_val
,
tensor
.
as_tensor_variable
(
kshp
[
-
2
:]))
utt
.
verify_grad
(
conv_gradweight
,
[
img
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradinput
(
self
):
kern_sym
=
theano
.
tensor
.
tensor4
(
'kern'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
for
imshp
,
kshp
,
topshp
,
pad
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
topgrad_shape
,
self
.
border_mode
):
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
topshp
)
.
astype
(
theano
.
config
.
floatX
)
asymmetric_conv_op
=
self
.
conv2d_gradi
(
border_mode
=
pad
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
))
asymmetric_out_sym
=
asymmetric_conv_op
(
kern_sym
,
top_sym
,
imshp
[
-
2
:])
asymmetric_func
=
theano
.
function
([
kern_sym
,
top_sym
],
asymmetric_out_sym
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradi_op
)
for
node
in
asymmetric_func
.
maker
.
fgraph
.
toposort
()])
asymmetric_output
=
asymmetric_func
(
kern
,
top
)
ref_conv_op
=
self
.
conv2d_gradi
(
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
))
exp_imshp
=
[
imshp
[
2
]
+
pad
[
0
][
0
]
+
pad
[
0
][
1
],
imshp
[
3
]
+
pad
[
1
][
0
]
+
pad
[
1
][
1
]]
ref_out_sym
=
ref_conv_op
(
kern_sym
,
top_sym
,
exp_imshp
)
ref_func
=
theano
.
function
([
kern_sym
,
top_sym
],
ref_out_sym
,
mode
=
self
.
mode
)
ref_output
=
ref_func
(
kern
,
top
)
ref_output
=
ref_output
[:,
:,
pad
[
0
][
0
]:
imshp
[
2
]
+
pad
[
0
][
0
],
pad
[
1
][
0
]:
imshp
[
3
]
+
pad
[
1
][
0
]]
utt
.
assert_allclose
(
asymmetric_output
,
ref_output
)
def
conv_gradinputs
(
filters_val
,
output_val
):
return
asymmetric_conv_op
(
filters_val
,
output_val
,
tensor
.
as_tensor_variable
(
imshp
[
-
2
:]))
utt
.
verify_grad
(
conv_gradinputs
,
[
kern
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
class
TestCausalConv
(
unittest
.
TestCase
):
mode
=
theano
.
compile
.
mode
.
Mode
(
optimizer
=
'None'
)
img
=
np
.
array
([[[
2
,
4
,
9
,
5
,
8
],
[
0
,
0
,
4
,
0
,
5
]],
[[
2
,
5
,
8
,
5
,
5
],
[
1
,
3
,
0
,
7
,
9
]],
[[
7
,
0
,
7
,
1
,
0
],
[
0
,
1
,
4
,
7
,
2
]]])
.
astype
(
theano
.
config
.
floatX
)
kern
=
np
.
array
([[[
5
,
3
,
1
],
[
3
,
1
,
0
]],
[[
6
,
4
,
9
],
[
2
,
2
,
7
]]])
.
astype
(
theano
.
config
.
floatX
)
dilation
=
2
precomp_top
=
np
.
array
([[[
10
,
20
,
63
,
37
,
88
],
[
12
,
24
,
70
,
46
,
120
]],
[[
13
,
34
,
47
,
64
,
78
],
[
14
,
36
,
58
,
70
,
105
]],
[[
35
,
3
,
68
,
27
,
38
],
[
42
,
2
,
78
,
22
,
103
]]])
.
astype
(
theano
.
config
.
floatX
)
def
test_interface
(
self
):
img_sym
=
theano
.
tensor
.
tensor3
(
'img'
)
kern_sym
=
theano
.
tensor
.
tensor3
(
'kern'
)
sym_out
=
causal_conv1d
(
img_sym
,
kern_sym
,
self
.
kern
.
shape
,
filter_dilation
=
self
.
dilation
)
causal_func
=
theano
.
function
([
img_sym
,
kern_sym
],
sym_out
,
mode
=
self
.
mode
)
output
=
causal_func
(
self
.
img
,
self
.
kern
)
utt
.
assert_allclose
(
output
,
self
.
precomp_top
)
def
causal_conv_fn
(
inputs_val
,
filters_val
):
return
causal_conv1d
(
inputs_val
,
filters_val
,
self
.
kern
.
shape
,
filter_dilation
=
1
)
utt
.
verify_grad
(
causal_conv_fn
,
[
self
.
img
,
self
.
kern
],
mode
=
self
.
mode
,
eps
=
1
)
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
ea62004b
...
@@ -11,6 +11,7 @@ import theano.tensor as T
...
@@ -11,6 +11,7 @@ import theano.tensor as T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
,
TestUnsharedConv
from
theano.tensor.nnet.tests.test_abstract_conv
import
TestAsymmetricPadding
,
TestCausalConv
class
TestCorr2D
(
utt
.
InferShapeTester
):
class
TestCorr2D
(
utt
.
InferShapeTester
):
...
@@ -454,6 +455,23 @@ class TestUnsharedCorr2d(TestUnsharedConv):
...
@@ -454,6 +455,23 @@ class TestUnsharedCorr2d(TestUnsharedConv):
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
class
TestAsymmetricCorr
(
TestAsymmetricPadding
):
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
.
excluding
(
'gpuarray'
)
else
:
mode
=
None
conv2d_op
=
corr
.
CorrMM
conv2d_gradw_op
=
corr
.
CorrMM_gradWeights
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
class
TestCausalCorr
(
TestCausalConv
):
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
.
excluding
(
'gpuarray'
)
else
:
mode
=
None
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
t
=
TestCorr2D
(
'setUp'
)
t
=
TestCorr2D
(
'setUp'
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论