Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a668c6c5
提交
a668c6c5
authored
7月 01, 2016
作者:
Pascal Lamblin
提交者:
GitHub
7月 01, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4587 from niasla/dilated_convolution
Implementation of 2D dilated convolution/correlation.
上级
d78f44f6
2dcf3753
隐藏空白字符变更
内嵌
并排
正在显示
13 个修改的文件
包含
651 行增加
和
320 行删除
+651
-320
dnn.py
theano/gpuarray/dnn.py
+3
-0
blas.py
theano/sandbox/cuda/blas.py
+79
-44
corr_gemm.cu
theano/sandbox/cuda/corr_gemm.cu
+145
-46
dnn.py
theano/sandbox/cuda/dnn.py
+3
-0
opt.py
theano/sandbox/cuda/opt.py
+21
-9
test_abstractconv.py
theano/sandbox/cuda/tests/test_abstractconv.py
+22
-15
__init__.py
theano/tensor/nnet/__init__.py
+7
-2
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+104
-52
corr.py
theano/tensor/nnet/corr.py
+70
-40
corr_gemm.c
theano/tensor/nnet/corr_gemm.c
+44
-28
opt.py
theano/tensor/nnet/opt.py
+13
-6
test_abstract_conv.py
theano/tensor/nnet/tests/test_abstract_conv.py
+107
-70
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+33
-8
没有找到文件。
theano/gpuarray/dnn.py
浏览文件 @
a668c6c5
...
@@ -1393,6 +1393,9 @@ def local_abstractconv_cudnn(node):
...
@@ -1393,6 +1393,9 @@ def local_abstractconv_cudnn(node):
inp1
=
node
.
inputs
[
0
]
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
inp2
=
node
.
inputs
[
1
]
if
(
node
.
op
.
filter_dilation
!=
(
1
,
1
)):
return
None
if
not
isinstance
(
inp1
.
type
,
GpuArrayType
):
if
not
isinstance
(
inp1
.
type
,
GpuArrayType
):
return
None
return
None
...
...
theano/sandbox/cuda/blas.py
浏览文件 @
a668c6c5
...
@@ -855,15 +855,17 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -855,15 +855,17 @@ class BaseGpuCorrMM(GpuOp):
or a pair of integers
or a pair of integers
subsample
subsample
Perform subsampling of the output (default: (1, 1)).
Perform subsampling of the output (default: (1, 1)).
filter_dilation
Perform subsampling of the input, also known as dilation (default: (1, 1)).
pad
pad
*deprecated*, now you should always use border_mode.
*deprecated*, now you should always use border_mode.
"""
"""
check_broadcast
=
False
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
pad
=
(
0
,
0
)):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
pad
=
(
0
,
0
)):
if
pad
!=
(
0
,
0
):
if
pad
!=
(
0
,
0
):
_logger
.
warning
(
_logger
.
warning
(
'do not use pad for BaseGpuCorrMM; please set padding in '
'do not use pad for BaseGpuCorrMM; please set padding in '
...
@@ -885,7 +887,10 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -885,7 +887,10 @@ class BaseGpuCorrMM(GpuOp):
self
.
border_mode
=
border_mode
self
.
border_mode
=
border_mode
if
len
(
subsample
)
!=
2
:
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
raise
ValueError
(
"subsample must have two elements"
)
self
.
subsample
=
subsample
if
len
(
filter_dilation
)
!=
2
:
raise
ValueError
(
"filter_dilation must have two elements"
)
self
.
subsample
=
tuple
(
subsample
)
self
.
filter_dilation
=
tuple
(
filter_dilation
)
@property
@property
def
pad
(
self
):
def
pad
(
self
):
...
@@ -894,10 +899,11 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -894,10 +899,11 @@ class BaseGpuCorrMM(GpuOp):
return
(
0
,
0
)
return
(
0
,
0
)
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s
,
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
,
self
.
border_mode
,
self
.
border_mode
,
str
(
self
.
subsample
))
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
))
def
flops
(
self
,
inp
,
outp
):
def
flops
(
self
,
inp
,
outp
):
"""
"""
...
@@ -922,7 +928,7 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -922,7 +928,7 @@ class BaseGpuCorrMM(GpuOp):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
0
,
2
4
)
return
(
0
,
2
6
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -976,6 +982,7 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -976,6 +982,7 @@ class BaseGpuCorrMM(GpuOp):
"""
"""
dH
,
dW
=
self
.
subsample
dH
,
dW
=
self
.
subsample
dilH
,
dilW
=
self
.
filter_dilation
if
self
.
border_mode
==
"half"
:
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
elif
self
.
border_mode
==
"full"
:
...
@@ -1022,6 +1029,8 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1022,6 +1029,8 @@ class BaseGpuCorrMM(GpuOp):
// Optional args
// Optional args
int dH =
%(dH)
s;
int dH =
%(dH)
s;
int dW =
%(dW)
s;
int dW =
%(dW)
s;
int dilH =
%(dilH)
s;
int dilW =
%(dilW)
s;
int padH =
%(padH)
s;
int padH =
%(padH)
s;
int padW =
%(padW)
s;
int padW =
%(padW)
s;
...
@@ -1045,39 +1054,43 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1045,39 +1054,43 @@ class BaseGpuCorrMM(GpuOp):
}
}
else if (padH == -2) {
else if (padH == -2) {
// vertical full padding, we can infer the kernel height
// vertical full padding, we can infer the kernel height
kH =
2 - CudaNdarray_HOST_DIMS(bottom)[2] + (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH
;
kH =
(2 - CudaNdarray_HOST_DIMS(bottom)[2] + (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1
;
}
}
else {
else {
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH =
CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH
;
kH =
(CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1
;
}
}
if ((dW != 1) || (padW == -1)) {
if ((dW != 1) || (padW == -1)) {
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW == -2) {
kW =
2 - CudaNdarray_HOST_DIMS(bottom)[3] + (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW
;
kW =
(2 - CudaNdarray_HOST_DIMS(bottom)[3] + (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1
;
}
}
else {
else {
kW =
CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW
;
kW =
(CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1
;
}
}
}
}
// Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH == -1) { // vertical half padding
padH = kH / 2;
padH =
dil_
kH / 2;
}
}
else if (padH == -2) { // vertical full padding
else if (padH == -2) { // vertical full padding
padH = kH - 1;
padH =
dil_
kH - 1;
}
}
else if (padH < 0) {
else if (padH < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
%(fail)
s
%(fail)
s
}
}
if (padW == -1) { // horizontal half padding
if (padW == -1) { // horizontal half padding
padW = kW / 2;
padW =
dil_
kW / 2;
}
}
else if (padW == -2) { // horizontal full padding
else if (padW == -2) { // horizontal full padding
padW = kW - 1;
padW =
dil_
kW - 1;
}
}
else if (padW < 0) {
else if (padW < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
...
@@ -1089,15 +1102,15 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1089,15 +1102,15 @@ class BaseGpuCorrMM(GpuOp):
switch(direction) {
switch(direction) {
case 0: // forward pass
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad -
weight
) / sample + 1
// height and width: top = (bottom + 2*pad -
((weight-1)*dil + 1)
) / sample + 1
out_dim[0] = CudaNdarray_HOST_DIMS(bottom)[0];
out_dim[0] = CudaNdarray_HOST_DIMS(bottom)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[0];
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH -
CudaNdarray_HOST_DIMS(weights)[2]
) / dH + 1;
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH -
((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)
) / dH + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW -
CudaNdarray_HOST_DIMS(weights)[3]
) / dW + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW -
((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)
) / dW + 1;
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights =
bottom + 2*pad - (top - 1) * sample
// height and width: weights =
(bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = CudaNdarray_HOST_DIMS(top)[1];
out_dim[0] = CudaNdarray_HOST_DIMS(top)[1];
out_dim[1] = CudaNdarray_HOST_DIMS(bottom)[1];
out_dim[1] = CudaNdarray_HOST_DIMS(bottom)[1];
out_dim[2] = kH; // already inferred further above
out_dim[2] = kH; // already inferred further above
...
@@ -1105,11 +1118,11 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1105,11 +1118,11 @@ class BaseGpuCorrMM(GpuOp):
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample +
weights
- 2*pad
// height and width: bottom = (top - 1) * sample +
(weights-1)*dil + 1
- 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH +
CudaNdarray_HOST_DIMS(weights)[2]
- 2*padH;
out_dim[2] = (dH != 1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH +
(CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1
- 2*padH;
out_dim[3] = (dW != 1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW +
CudaNdarray_HOST_DIMS(weights)[3]
- 2*padW;
out_dim[3] = (dW != 1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW +
(CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1
- 2*padW;
break;
break;
default:
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
...
@@ -1137,7 +1150,7 @@ class BaseGpuCorrMM(GpuOp):
...
@@ -1137,7 +1150,7 @@ class BaseGpuCorrMM(GpuOp):
}
}
// Call CUDA code
// Call CUDA code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, padH, padW);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW,
dilH, dilW,
padH, padW);
if (out2==NULL){
if (out2==NULL){
%(fail)
s
%(fail)
s
}
}
...
@@ -1168,6 +1181,10 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -1168,6 +1181,10 @@ class GpuCorrMM(BaseGpuCorrMM):
`(sv, sh)` is equivalent to `GpuCorrMM(...)(...)[:,:,::sv, ::sh]`,
`(sv, sh)` is equivalent to `GpuCorrMM(...)(...)[:,:,::sv, ::sh]`,
but faster.
but faster.
Set to `(1, 1)` to disable subsampling.
Set to `(1, 1)` to disable subsampling.
filter_dilation
The filter dilation operation applied to each input image.
Should be a tuple with 2 elements.
Set to `(1, 1)` to disable filter dilation.
pad
pad
Deprecated alias for `border_mode`.
Deprecated alias for `border_mode`.
...
@@ -1198,8 +1215,10 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -1198,8 +1215,10 @@ class GpuCorrMM(BaseGpuCorrMM):
"""
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
pad
=
(
0
,
0
)):
pad
=
(
0
,
0
)):
super
(
GpuCorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
super
(
GpuCorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
pad
)
def
make_node
(
self
,
img
,
kern
):
def
make_node
(
self
,
img
,
kern
):
img
=
as_cuda_ndarray_variable
(
img
)
img
=
as_cuda_ndarray_variable
(
img
)
...
@@ -1223,9 +1242,13 @@ class GpuCorrMM(BaseGpuCorrMM):
...
@@ -1223,9 +1242,13 @@ class GpuCorrMM(BaseGpuCorrMM):
bottom
,
weights
=
inp
bottom
,
weights
=
inp
top
,
=
grads
top
,
=
grads
top
=
gpu_contiguous
(
top
)
top
=
gpu_contiguous
(
top
)
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
)(
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
)(
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
return
d_bottom
,
d_weights
...
@@ -1243,8 +1266,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -1243,8 +1266,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
def
__init__
(
self
,
border_mode
=
"valid"
,
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
pad
=
(
0
,
0
)):
pad
=
(
0
,
0
)):
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
pad
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
img
=
as_cuda_ndarray_variable
(
img
)
img
=
as_cuda_ndarray_variable
(
img
)
...
@@ -1278,12 +1305,13 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
...
@@ -1278,12 +1305,13 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
bottom
,
top
=
inp
[:
2
]
bottom
,
top
=
inp
[:
2
]
weights
,
=
grads
weights
,
=
grads
weights
=
gpu_contiguous
(
weights
)
weights
=
gpu_contiguous
(
weights
)
d_bottom
=
GpuCorrMM_gradInputs
(
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
)(
weights
,
self
.
subsample
,
top
,
self
.
filter_dilation
)(
weights
,
bottom
.
shape
[
-
2
:])
top
,
bottom
.
shape
[
-
2
:])
d_top
=
GpuCorrMM
(
d_top
=
GpuCorrMM
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
d_height_width
=
(
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),
theano
.
gradient
.
DisconnectedType
()(),
)
*
2
if
len
(
inp
)
==
4
else
()
)
*
2
if
len
(
inp
)
==
4
else
()
...
@@ -1309,8 +1337,10 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -1309,8 +1337,10 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
def
__init__
(
self
,
border_mode
=
"valid"
,
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
pad
=
(
0
,
0
)):
pad
=
(
0
,
0
)):
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
pad
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
kern
=
as_cuda_ndarray_variable
(
kern
)
kern
=
as_cuda_ndarray_variable
(
kern
)
...
@@ -1341,11 +1371,14 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
...
@@ -1341,11 +1371,14 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
weights
,
top
=
inp
[:
2
]
weights
,
top
=
inp
[:
2
]
bottom
,
=
grads
bottom
,
=
grads
bottom
=
gpu_contiguous
(
bottom
)
bottom
=
gpu_contiguous
(
bottom
)
d_weights
=
GpuCorrMM_gradWeights
(
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
)(
self
.
subsample
,
bottom
,
top
,
weights
.
shape
[
-
2
:])
self
.
filter_dilation
)(
bottom
,
d_top
=
GpuCorrMM
(
top
,
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
weights
.
shape
[
-
2
:])
d_top
=
GpuCorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
d_height_width
=
(
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),
theano
.
gradient
.
DisconnectedType
()(),
)
*
2
if
len
(
inp
)
==
4
else
()
)
*
2
if
len
(
inp
)
==
4
else
()
...
@@ -1871,12 +1904,14 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
...
@@ -1871,12 +1904,14 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
weights
,
top
=
inp
[:
2
]
weights
,
top
=
inp
[:
2
]
bottom
,
=
grads
bottom
,
=
grads
bottom
=
gpu_contiguous
(
bottom
)
bottom
=
gpu_contiguous
(
bottom
)
d_weights
=
GpuCorr3dMM_gradWeights
(
d_weights
=
GpuCorr3dMM_gradWeights
(
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
self
.
subsample
,
bottom
,
top
,
weights
.
shape
[
-
3
:])
self
.
pad
)(
bottom
,
d_top
=
GpuCorr3dMM
(
top
,
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
weights
.
shape
[
-
3
:])
bottom
,
weights
)
d_top
=
GpuCorr3dMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
bottom
,
weights
)
d_height_width_depth
=
(
theano
.
gradient
.
DisconnectedType
()(),)
\
d_height_width_depth
=
(
theano
.
gradient
.
DisconnectedType
()(),)
\
*
3
if
len
(
inp
)
==
5
else
()
*
3
if
len
(
inp
)
==
5
else
()
return
(
d_weights
,
d_top
)
+
d_height_width_depth
return
(
d_weights
,
d_top
)
+
d_height_width_depth
...
...
theano/sandbox/cuda/corr_gemm.cu
浏览文件 @
a668c6c5
...
@@ -52,6 +52,39 @@ inline int GET_BLOCKS(const int N) {
...
@@ -52,6 +52,39 @@ inline int GET_BLOCKS(const int N) {
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu)
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu)
// Kernels for fast unfold + copy
// Kernels for fast unfold + copy
// CUDA kernel for the case of dilation
__global__ void dilated_im2col_kernel(const int n, const float* data_im,
const int height, const int width, const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int height_col, const int width_col,
float* data_col) {
CUDA_KERNEL_LOOP(index, n) {
const int h_index = index / width_col;
const int h_col = h_index % height_col;
const int w_col = index % width_col;
const int c_im = h_index / height_col;
const int c_col = c_im * kernel_h * kernel_w;
const int h_offset = h_col * stride_h - pad_h;
const int w_offset = w_col * stride_w - pad_w;
float* data_col_ptr = data_col;
data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
const float* data_im_ptr = data_im;
data_im_ptr += (c_im * height + h_offset) * width + w_offset;
for (int i = 0; i < kernel_h; ++i) {
for (int j = 0; j < kernel_w; ++j) {
int h_im = h_offset + i * dilation_h;
int w_im = w_offset + j * dilation_w;
*data_col_ptr =
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
data_im_ptr[i * dilation_h * width + j * dilation_w] : 0;
data_col_ptr += height_col * width_col;
}
}
}
}
__global__ void im2col_kernel(const int n, const float* data_im,
__global__ void im2col_kernel(const int n, const float* data_im,
const int height, const int width, const int kernel_h, const int kernel_w,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int pad_h, const int pad_w,
...
@@ -59,23 +92,24 @@ __global__ void im2col_kernel(const int n, const float* data_im,
...
@@ -59,23 +92,24 @@ __global__ void im2col_kernel(const int n, const float* data_im,
const int height_col, const int width_col,
const int height_col, const int width_col,
float* data_col) {
float* data_col) {
CUDA_KERNEL_LOOP(index, n) {
CUDA_KERNEL_LOOP(index, n) {
int w_out = index %
width_col;
const int h_index = index /
width_col;
int h_index = index / width
_col;
const int h_col = h_index % height
_col;
int h_out = h_index % height
_col;
const int w_col = index % width
_col;
int channel_in
= h_index / height_col;
const int c_im
= h_index / height_col;
int channel_out = channel_in
* kernel_h * kernel_w;
const int c_col = c_im
* kernel_h * kernel_w;
int h_in = h_out
* stride_h - pad_h;
const int h_offset = h_col
* stride_h - pad_h;
int w_in = w_out
* stride_w - pad_w;
const int w_offset = w_col
* stride_w - pad_w;
float* data_col_ptr = data_col;
float* data_col_ptr = data_col;
data_col_ptr += (c
hannel_out * height_col + h_out) * width_col + w_out
;
data_col_ptr += (c
_col * height_col + h_col) * width_col + w_col
;
const float* data_im_ptr = data_im;
const float* data_im_ptr = data_im;
data_im_ptr += (c
hannel_in * height + h_in) * width + w_in
;
data_im_ptr += (c
_im * height + h_offset) * width + w_offset
;
for (int i = 0; i < kernel_h; ++i) {
for (int i = 0; i < kernel_h; ++i) {
for (int j = 0; j < kernel_w; ++j) {
for (int j = 0; j < kernel_w; ++j) {
int h = h_in + i;
int h_im = h_offset + i ;
int w = w_in + j;
int w_im = w_offset + j ;
*data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
*data_col_ptr =
data_im_ptr[i * width + j] : 0;
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
data_im_ptr[i * width + j] : 0;
data_col_ptr += height_col * width_col;
data_col_ptr += height_col * width_col;
}
}
}
}
...
@@ -84,52 +118,97 @@ __global__ void im2col_kernel(const int n, const float* data_im,
...
@@ -84,52 +118,97 @@ __global__ void im2col_kernel(const int n, const float* data_im,
void im2col(const float* data_im, const int channels,
void im2col(const float* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int height, const int width, const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int stride_h, const int stride_w,
float* data_col) {
float* data_col) {
// We are going to launch channels * height_col * width_col kernels, each
// We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid.
// kernel responsible for copying a single-channel grid.
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1;
int num_kernels = channels * height_col * width_col;
int num_kernels = channels * height_col * width_col;
im2col_kernel<<<GET_BLOCKS(num_kernels),
if(dilation_h != 1 || dilation_w != 1){
dilated_im2col_kernel<<<GET_BLOCKS(num_kernels),
CUDA_NUM_THREADS>>>(
CUDA_NUM_THREADS>>>(
num_kernels, data_im, height, width, kernel_h, kernel_w,
pad_h,
num_kernels, data_im, height, width, kernel_h, kernel_w,
pad_w, stride_h, stride_w, height_col,
dilation_h, dilation_w, pad_h,
pad_w, stride_h, stride_w, height_col,
width_col, data_col);
width_col, data_col);
}
else{
im2col_kernel<<<GET_BLOCKS(num_kernels),
CUDA_NUM_THREADS>>>(
num_kernels, data_im, height, width, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, height_col,
width_col, data_col);
}
}
}
__global__ void col2im_kernel(const int n, const float* data_col,
// CUDA kernel for the case of dilation
__global__ void dilated_col2im_kernel(const int n, const float* data_col,
const int height, const int width, const int channels,
const int height, const int width, const int channels,
const int patch_h, const int patch_w,
const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int stride_h, const int stride_w,
const int height_col, const int width_col,
const int height_col, const int width_col,
float* data_im) {
float* data_im) {
CUDA_KERNEL_LOOP(index, n) {
CUDA_KERNEL_LOOP(index, n) {
float val = 0;
float val = 0;
int w = index % width + pad_w;
const int w_im = index % width + pad_w;
int h = (index / width) % height + pad_h;
const int h_im = (index / width) % height + pad_h;
int c = index / (width * height);
const int c_im = index / (width * height);
int kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
int kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
// compute the start and end of the output
// compute the start and end of the output
int w_col_start = (w < patch_w) ? 0 : (w - patch_w) / stride_w + 1;
const int w_col_start =
int w_col_end = min(w / stride_w + 1, width_col);
(w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1;
int h_col_start = (h < patch_h) ? 0 : (h - patch_h) / stride_h + 1;
const int w_col_end = min(w_im / stride_w + 1, width_col);
int h_col_end = min(h / stride_h + 1, height_col);
const int h_col_start =
/*
(h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1;
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
const int h_col_end = min(h_im / stride_h + 1, height_col);
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
// TODO: use LCM of stride and dilation to avoid unnecessary loops
// the col location: [c * width * height + h_out, w_out]
for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) {
int c_col = c * patch_h * patch_w + (h - h_col * stride_h) * ksize
for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) {
+ (w - w_col * stride_w);
int h_k = (h_im - h_col * stride_h);
val += data_col[(c_col * height_col + h_col) * width_col + w_col];
int w_k = (w_im - w_col * stride_w);
if (h_k % dilation_h == 0 && w_k % dilation_w == 0) {
h_k /= dilation_h;
w_k /= dilation_w;
int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) *
height_col + h_col) * width_col + w_col;
val += data_col[data_col_index];
}
}
}
}
}
*/
data_im[index] = val;
// equivalent implementation
}
}
__global__ void col2im_kernel(const int n, const float* data_col,
const int height, const int width, const int channels,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int height_col, const int width_col,
float* data_im) {
CUDA_KERNEL_LOOP(index, n) {
float val = 0;
const int w_im = index % width + pad_w;
const int h_im = (index / width) % height + pad_h;
const int c_im = index / (width * height);
// compute the start and end of the output
const int w_col_start =
(w_im < kernel_w) ? 0 : (w_im - kernel_w) / stride_w + 1;
const int w_col_end = min(w_im / stride_w + 1, width_col);
const int h_col_start =
(h_im < kernel_h) ? 0 : (h_im - kernel_h) / stride_h + 1;
const int h_col_end = min(h_im / stride_h + 1, height_col);
// equivalent implementation, no dilation
int offset =
int offset =
(c * patch_h * patch_w + h * patch_w + w
) * height_col * width_col;
(c_im * kernel_h * kernel_w + h_im * kernel_w + w_im
) * height_col * width_col;
int coeff_h_col = (1 - stride_h *
patch
_w * height_col) * width_col;
int coeff_h_col = (1 - stride_h *
kernel
_w * height_col) * width_col;
int coeff_w_col = (1 - stride_w * height_col * width_col);
int coeff_w_col = (1 - stride_w * height_col * width_col);
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
...
@@ -142,18 +221,30 @@ __global__ void col2im_kernel(const int n, const float* data_col,
...
@@ -142,18 +221,30 @@ __global__ void col2im_kernel(const int n, const float* data_col,
void col2im(const float* data_col, const int channels,
void col2im(const float* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w,
const int height, const int width, const int patch_h, const int patch_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w, const int stride_h,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, float* data_im) {
const int stride_w, float* data_im) {
int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
int dil_patch_h = (patch_h - 1) * dilation_h + 1;
int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
int dil_patch_w = (patch_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1;
int num_kernels = channels * height * width;
int num_kernels = channels * height * width;
// To avoid involving atomic operations, we will launch one kernel per
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
// bottom dimension, and then in the kernel add up the top dimensions.
col2im_kernel<<<GET_BLOCKS(num_kernels),
if(dilation_h != 1 || dilation_w != 1){
dilated_col2im_kernel<<<GET_BLOCKS(num_kernels),
CUDA_NUM_THREADS>>>(
num_kernels, data_col, height, width, channels, patch_h, patch_w,
dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w,
height_col, width_col, data_im);
}
else{
col2im_kernel<<<GET_BLOCKS(num_kernels),
CUDA_NUM_THREADS>>>(
CUDA_NUM_THREADS>>>(
num_kernels, data_col, height, width, channels, patch_h, patch_w,
num_kernels, data_col, height, width, channels, patch_h, patch_w,
pad_h, pad_w, stride_h, stride_w,
pad_h, pad_w, stride_h, stride_w,
height_col, width_col, data_im);
height_col, width_col, data_im);
}
}
}
...
@@ -167,6 +258,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
...
@@ -167,6 +258,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
const int direction,
const int direction,
const int dH = 1,
const int dH = 1,
const int dW = 1,
const int dW = 1,
const int dilH = 1,
const int dilW = 1,
const int padH = 0,
const int padH = 0,
const int padW = 0)
const int padW = 0)
{
{
...
@@ -236,9 +329,12 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
...
@@ -236,9 +329,12 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
"GpuCorrMM images and kernel must have the same stack size\n");
"GpuCorrMM images and kernel must have the same stack size\n");
return NULL;
return NULL;
}
}
// implicit dilated filter
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - kH) / dH + 1;
const int topHeight = (bottomHeight + 2*padH -
dil_
kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - kW) / dW + 1;
const int topWidth = (bottomWidth + 2*padW -
dil_
kW) / dW + 1;
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
...
@@ -286,7 +382,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
...
@@ -286,7 +382,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
for (int n = 0; n < batchSize; n++) {
for (int n = 0; n < batchSize; n++) {
// First, im2col
// First, im2col
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata);
bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, col->devdata);
cudaError_t err = cudaGetLastError();
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
PyErr_Format(PyExc_RuntimeError,
...
@@ -353,7 +450,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
...
@@ -353,7 +450,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
for (int n = 0; n < batchSize; n++) {
for (int n = 0; n < batchSize; n++) {
// First, im2col
// First, im2col
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata);
bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, col->devdata);
cudaError_t err = cudaGetLastError();
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
PyErr_Format(PyExc_RuntimeError,
...
@@ -438,7 +536,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
...
@@ -438,7 +536,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
}
}
// col2im back to the data
// col2im back to the data
col2im(col->devdata, nChannels, bottomHeight, bottomWidth,
col2im(col->devdata, nChannels, bottomHeight, bottomWidth,
kH, kW, padH, padW, dH, dW, bottom->devdata + n * bottom_stride);
kH, kW, dilH, dilW, padH, padW,
dH, dW, bottom->devdata + n * bottom_stride);
cudaError_t err = cudaGetLastError();
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
PyErr_Format(PyExc_RuntimeError,
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
a668c6c5
...
@@ -3116,6 +3116,8 @@ def local_abstractconv_cudnn(node):
...
@@ -3116,6 +3116,8 @@ def local_abstractconv_cudnn(node):
AbstractConv2d_gradWeights
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
))):
AbstractConv2d_gradInputs
))):
return
None
return
None
if
(
node
.
op
.
filter_dilation
!=
(
1
,
1
)):
return
None
inp1
=
node
.
inputs
[
0
]
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
inp2
=
node
.
inputs
[
1
]
...
@@ -3123,6 +3125,7 @@ def local_abstractconv_cudnn(node):
...
@@ -3123,6 +3125,7 @@ def local_abstractconv_cudnn(node):
if
(
not
isinstance
(
inp1
.
type
,
CudaNdarrayType
)
or
if
(
not
isinstance
(
inp1
.
type
,
CudaNdarrayType
)
or
not
isinstance
(
inp2
.
type
,
CudaNdarrayType
)):
not
isinstance
(
inp2
.
type
,
CudaNdarrayType
)):
return
None
return
None
if
not
dnn_available
():
if
not
dnn_available
():
return
None
return
None
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
a668c6c5
...
@@ -1622,7 +1622,8 @@ def local_conv_gemm(node):
...
@@ -1622,7 +1622,8 @@ def local_conv_gemm(node):
# because we are not allowed to replace a CudaNdarray with
# because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization)
# a DimShuffle instance in a graph optimization)
rval
=
theano
.
sandbox
.
cuda
.
as_cuda_ndarray_variable
(
rval
=
theano
.
sandbox
.
cuda
.
as_cuda_ndarray_variable
(
GpuCorrMM_gradWeights
(
border_mode
,
subsample
)(
GpuCorrMM_gradWeights
(
border_mode
,
subsample
)(
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
)),
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
)),
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
))
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
))
)
.
dimshuffle
(
1
,
0
,
2
,
3
))
)
.
dimshuffle
(
1
,
0
,
2
,
3
))
...
@@ -2769,28 +2770,33 @@ def local_abstractconv_gemm(node):
...
@@ -2769,28 +2770,33 @@ def local_abstractconv_gemm(node):
border_mode
=
node
.
op
.
border_mode
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
subsample
=
node
.
op
.
subsample
if
(
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
)):
filter_dilation
=
node
.
op
.
filter_dilation
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
# need to dimshuffle the kernel for full convolution
# need to dimshuffle the kernel for full convolution
kern
=
kern
.
dimshuffle
(
1
,
0
,
2
,
3
)
kern
=
kern
.
dimshuffle
(
1
,
0
,
2
,
3
)
# call GpuCorrMM_gradInputs
# call GpuCorrMM_gradInputs
rval
=
GpuCorrMM_gradInputs
(
'valid'
,
subsample
)(
rval
=
GpuCorrMM_gradInputs
(
'valid'
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
img
))
gpu_contiguous
(
kern
),
gpu_contiguous
(
img
))
else
:
else
:
# need to flip the kernel if necessary
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
# By default use GpuCorrMM
# By default use GpuCorrMM
rval
=
GpuCorrMM
(
border_mode
,
subsample
)(
gpu_contiguous
(
img
),
rval
=
GpuCorrMM
(
border_mode
,
gpu_contiguous
(
kern
))
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
# call GpuCorrMM_gradWeights if good
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth
# (the latter is faster if batchsize * kernelHeight * kernelWidth
# is larger than inputChannels * outputHeight * outputWidth.
# is larger than inputChannels * outputHeight * outputWidth.
# GpuConv does not always store information on the batchsize and
# GpuConv does not always store information on the batchsize and
# channels, though, so we only use what information we have.)
# channels, though, so we only use what information we have.)
if
((
subsample
==
(
1
,
1
))
and
if
((
subsample
==
(
1
,
1
))
and
(
filter_dilation
==
(
1
,
1
))
and
(
node
.
op
.
imshp
is
not
None
)
and
(
node
.
op
.
imshp
is
not
None
)
and
(
None
not
in
node
.
op
.
imshp
[
-
2
:])
and
(
None
not
in
node
.
op
.
imshp
[
-
2
:])
and
(
node
.
op
.
kshp
is
not
None
)
and
(
node
.
op
.
kshp
is
not
None
)
and
...
@@ -2810,7 +2816,9 @@ def local_abstractconv_gemm(node):
...
@@ -2810,7 +2816,9 @@ def local_abstractconv_gemm(node):
# because we are not allowed to replace a CudaNdarray with
# because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization)
# a DimShuffle instance in a graph optimization)
rval
=
theano
.
sandbox
.
cuda
.
as_cuda_ndarray_variable
(
rval
=
theano
.
sandbox
.
cuda
.
as_cuda_ndarray_variable
(
GpuCorrMM_gradWeights
(
border_mode
,
subsample
)(
GpuCorrMM_gradWeights
(
border_mode
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
)),
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
)),
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
))
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
))
)
.
dimshuffle
(
1
,
0
,
2
,
3
))
)
.
dimshuffle
(
1
,
0
,
2
,
3
))
...
@@ -2827,7 +2835,8 @@ def local_abstractconv_gradweight_gemm(node):
...
@@ -2827,7 +2835,8 @@ def local_abstractconv_gradweight_gemm(node):
return
None
return
None
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
...
@@ -2849,7 +2858,8 @@ def local_abstractconv_gradinputs_gemm(node):
...
@@ -2849,7 +2858,8 @@ def local_abstractconv_gradinputs_gemm(node):
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
return
[
rval
]
return
[
rval
]
...
@@ -2870,10 +2880,12 @@ conv_groupopt.register('local_abstractconv_dnn',
...
@@ -2870,10 +2880,12 @@ conv_groupopt.register('local_abstractconv_dnn',
conv_groupopt
.
register
(
'local_abstractconv_gemm'
,
local_abstractconv_gemm
,
30
,
conv_groupopt
.
register
(
'local_abstractconv_gemm'
,
local_abstractconv_gemm
,
30
,
'conv_gemm'
,
'conv_gemm'
,
'gpu'
,
'fast_compile'
,
'fast_run'
)
'gpu'
,
'fast_compile'
,
'fast_run'
)
conv_groupopt
.
register
(
'local_abstractconv_gradweight_gemm'
,
conv_groupopt
.
register
(
'local_abstractconv_gradweight_gemm'
,
local_abstractconv_gradweight_gemm
,
30
,
local_abstractconv_gradweight_gemm
,
30
,
'conv_gemm'
,
'conv_gemm'
,
'gpu'
,
'fast_compile'
,
'fast_run'
)
'gpu'
,
'fast_compile'
,
'fast_run'
)
conv_groupopt
.
register
(
'local_abstractconv_gradinputs_gemm'
,
conv_groupopt
.
register
(
'local_abstractconv_gradinputs_gemm'
,
local_abstractconv_gradinputs_gemm
,
30
,
local_abstractconv_gradinputs_gemm
,
30
,
'conv_gemm'
,
'conv_gemm'
,
...
...
theano/sandbox/cuda/tests/test_abstractconv.py
浏览文件 @
a668c6c5
...
@@ -29,25 +29,30 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
...
@@ -29,25 +29,30 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
self
.
provide_shape
=
[
False
]
self
.
provide_shape
=
[
False
]
self
.
shared
=
gpu_shared
self
.
shared
=
gpu_shared
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
):
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
)):
if
fd
!=
(
1
,
1
):
raise
SkipTest
(
"No dilation implementation for cuDNN ConvOp."
)
if
not
dnn_available
():
if
not
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
mode
=
mode_with_gpu
mode
=
mode_with_gpu
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConv
)
filter_flip
=
flip
,
target_op
=
GpuDnnConv
,
filter_dilation
=
fd
)
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradW
)
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradW
,
filter_dilation
=
fd
)
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
)
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
filter_dilation
=
fd
)
class
TestCorrMMConv2d
(
test_abstract_conv
.
BaseTestConv2d
):
class
TestCorrMMConv2d
(
test_abstract_conv
.
BaseTestConv2d
):
...
@@ -56,28 +61,30 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
...
@@ -56,28 +61,30 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
self
.
shared
=
gpu_shared
self
.
shared
=
gpu_shared
self
.
mode
=
mode_with_gpu
.
excluding
(
'cudnn'
)
self
.
mode
=
mode_with_gpu
.
excluding
(
'cudnn'
)
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
):
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
)
):
mode
=
self
.
mode
mode
=
self
.
mode
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
verify_grad
=
True
,
mode
=
mode
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
filter_flip
=
flip
,
target_op
=
(
GpuCorrMM
,
target_op
=
(
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
)
,
GpuCorrMM_gradInputs
)
)
filter_dilation
=
fd
)
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
filter_flip
=
flip
,
target_op
=
GpuCorrMM_gradWeights
)
target_op
=
GpuCorrMM_gradWeights
,
filter_dilation
=
fd
)
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
filter_flip
=
flip
,
target_op
=
GpuCorrMM_gradInputs
)
target_op
=
GpuCorrMM_gradInputs
,
filter_dilation
=
fd
)
class
TestDnnConvTypes
(
test_abstract_conv
.
TestConvTypes
):
class
TestDnnConvTypes
(
test_abstract_conv
.
TestConvTypes
):
...
...
theano/tensor/nnet/__init__.py
浏览文件 @
a668c6c5
...
@@ -35,7 +35,7 @@ from .abstract_conv import conv2d as abstract_conv2d
...
@@ -35,7 +35,7 @@ from .abstract_conv import conv2d as abstract_conv2d
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
image_shape
=
None
,
**
kwargs
):
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
**
kwargs
):
"""
"""
This function will build the symbolic graph for convolving a mini-batch of a
This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
stack of 2D inputs with a set of 2D filters. The implementation is modelled
...
@@ -95,6 +95,10 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
...
@@ -95,6 +95,10 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
image_shape: None, tuple/list of len 4 of int or Constant variable
image_shape: None, tuple/list of len 4 of int or Constant variable
Deprecated alias for input_shape.
Deprecated alias for input_shape.
filter_dilation: tuple of len 2
Factor by which to subsample (stride) the input.
Also called dilation elsewhere.
kwargs: Any other keyword arguments are accepted for backwards
kwargs: Any other keyword arguments are accepted for backwards
compatibility, but will be ignored.
compatibility, but will be ignored.
...
@@ -140,4 +144,5 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
...
@@ -140,4 +144,5 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
" be provided at the same time."
)
" be provided at the same time."
)
return
abstract_conv2d
(
input
,
filters
,
input_shape
,
filter_shape
,
return
abstract_conv2d
(
input
,
filters
,
input_shape
,
filter_shape
,
border_mode
,
subsample
,
filter_flip
)
border_mode
,
subsample
,
filter_flip
,
filter_dilation
)
theano/tensor/nnet/abstract_conv.py
浏览文件 @
a668c6c5
...
@@ -32,7 +32,8 @@ _logger = logging.getLogger("theano.tensor.nnet.abstract_conv")
...
@@ -32,7 +32,8 @@ _logger = logging.getLogger("theano.tensor.nnet.abstract_conv")
def
get_conv_output_shape
(
image_shape
,
kernel_shape
,
def
get_conv_output_shape
(
image_shape
,
kernel_shape
,
border_mode
,
subsample
):
border_mode
,
subsample
,
filter_dilation
=
(
1
,
1
)):
"""
"""
This function compute the output shape of convolution operation.
This function compute the output shape of convolution operation.
...
@@ -53,6 +54,8 @@ def get_conv_output_shape(image_shape, kernel_shape,
...
@@ -53,6 +54,8 @@ def get_conv_output_shape(image_shape, kernel_shape,
subsample: tuple of int (symbolic or numeric). Its or three elements
subsample: tuple of int (symbolic or numeric). Its or three elements
espectively correspond to the subsampling on height and width (and
espectively correspond to the subsampling on height and width (and
possibly depth) axis.
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two elements
correspond respectively to the dilation on height and width axis.
Returns
Returns
-------
-------
...
@@ -65,17 +68,19 @@ def get_conv_output_shape(image_shape, kernel_shape,
...
@@ -65,17 +68,19 @@ def get_conv_output_shape(image_shape, kernel_shape,
nkern
,
kshp
=
kernel_shape
[
0
],
kernel_shape
[
2
:]
nkern
,
kshp
=
kernel_shape
[
0
],
kernel_shape
[
2
:]
if
isinstance
(
border_mode
,
tuple
):
if
isinstance
(
border_mode
,
tuple
):
out_shp
=
tuple
(
get_conv_shape_1axis
(
out_shp
=
tuple
(
get_conv_shape_1axis
(
imshp
[
i
],
kshp
[
i
],
border_mode
[
i
],
subsample
[
i
])
imshp
[
i
],
kshp
[
i
],
border_mode
[
i
],
for
i
in
range
(
len
(
subsample
)))
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
else
:
else
:
out_shp
=
tuple
(
get_conv_shape_1axis
(
out_shp
=
tuple
(
get_conv_shape_1axis
(
imshp
[
i
],
kshp
[
i
],
border_mode
,
subsample
[
i
])
imshp
[
i
],
kshp
[
i
],
border_mode
,
for
i
in
range
(
len
(
subsample
)))
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
return
(
bsize
,
nkern
)
+
out_shp
return
(
bsize
,
nkern
)
+
out_shp
def
get_conv_shape_1axis
(
image_shape
,
kernel_shape
,
# filter dilation set by default to 1
border_mode
,
subsample
):
# for compatibility with other tests.
def
get_conv_shape_1axis
(
image_shape
,
kernel_shape
,
border_mode
,
subsample
,
dilation
=
1
):
"""
"""
This function compute the output shape of convolution operation.
This function compute the output shape of convolution operation.
...
@@ -90,6 +95,8 @@ def get_conv_shape_1axis(image_shape, kernel_shape,
...
@@ -90,6 +95,8 @@ def get_conv_shape_1axis(image_shape, kernel_shape,
the padding on the considered axis.
the padding on the considered axis.
subsample: int. It must correspond to the subsampling on the
subsample: int. It must correspond to the subsampling on the
considered axis.
considered axis.
dilation: int. It must correspond to the dilation on the
considered axis.
Returns
Returns
-------
-------
...
@@ -97,19 +104,22 @@ def get_conv_shape_1axis(image_shape, kernel_shape,
...
@@ -97,19 +104,22 @@ def get_conv_shape_1axis(image_shape, kernel_shape,
considered axis. None if undefined.
considered axis. None if undefined.
"""
"""
if
None
in
[
image_shape
,
kernel_shape
,
border_mode
,
subsample
]:
if
None
in
[
image_shape
,
kernel_shape
,
border_mode
,
subsample
,
dilation
]:
return
None
return
None
# Implicit dilated kernel shape
dil_kernel_shape
=
(
kernel_shape
-
1
)
*
dilation
+
1
if
border_mode
==
"half"
:
if
border_mode
==
"half"
:
pad
=
kernel_shape
//
2
pad
=
dil_
kernel_shape
//
2
elif
border_mode
==
"full"
:
elif
border_mode
==
"full"
:
pad
=
kernel_shape
-
1
pad
=
dil_
kernel_shape
-
1
elif
border_mode
==
"valid"
:
elif
border_mode
==
"valid"
:
pad
=
0
pad
=
0
else
:
else
:
pad
=
border_mode
pad
=
border_mode
if
pad
<
0
:
if
pad
<
0
:
raise
ValueError
(
"border_mode must be >= 0"
)
raise
ValueError
(
"border_mode must be >= 0"
)
out_shp
=
(
image_shape
+
2
*
pad
-
kernel_shape
)
//
subsample
+
1
out_shp
=
(
image_shape
+
2
*
pad
-
dil_
kernel_shape
)
//
subsample
+
1
return
out_shp
return
out_shp
...
@@ -120,7 +130,8 @@ def conv2d(input,
...
@@ -120,7 +130,8 @@ def conv2d(input,
filter_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
"""This function will build the symbolic graph for convolving a mini-batch of a
"""This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
stack of 2D inputs with a set of 2D filters. The implementation is modelled
after Convolutional Neural Networks (CNN).
after Convolutional Neural Networks (CNN).
...
@@ -134,7 +145,8 @@ def conv2d(input,
...
@@ -134,7 +145,8 @@ def conv2d(input,
kshp
=
filter_shape
,
kshp
=
filter_shape
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
)
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
)
return
conv_op
(
input
,
filters
)
return
conv_op
(
input
,
filters
)
...
@@ -144,7 +156,8 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -144,7 +156,8 @@ def conv2d_grad_wrt_inputs(output_grad,
filter_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
"""Compute conv output gradient w.r.t its inputs
"""Compute conv output gradient w.r.t its inputs
This function builds the symbolic graph for getting the
This function builds the symbolic graph for getting the
...
@@ -214,6 +227,9 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -214,6 +227,9 @@ def conv2d_grad_wrt_inputs(output_grad,
referred to as a convolution, and this is the default. If
referred to as a convolution, and this is the default. If
``False``, the filters are not flipped and the operation is
``False``, the filters are not flipped and the operation is
referred to as a cross-correlation.
referred to as a cross-correlation.
filter_dilation : tuple of len 2
The filter dilation used in the forward pass.
Also known as input striding.
Returns
Returns
-------
-------
...
@@ -263,7 +279,8 @@ def conv2d_grad_wrt_inputs(output_grad,
...
@@ -263,7 +279,8 @@ def conv2d_grad_wrt_inputs(output_grad,
kshp
=
filter_shape
,
kshp
=
filter_shape
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
)
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
)
return
grad_input_op
(
filters
,
output_grad
,
input_shape
[
-
2
:])
return
grad_input_op
(
filters
,
output_grad
,
input_shape
[
-
2
:])
...
@@ -274,7 +291,8 @@ def conv2d_grad_wrt_weights(input,
...
@@ -274,7 +291,8 @@ def conv2d_grad_wrt_weights(input,
input_shape
=
None
,
input_shape
=
None
,
border_mode
=
'valid'
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
"""Compute conv output gradient w.r.t its weights
"""Compute conv output gradient w.r.t its weights
This function will build the symbolic graph for getting the
This function will build the symbolic graph for getting the
...
@@ -327,7 +345,6 @@ def conv2d_grad_wrt_weights(input,
...
@@ -327,7 +345,6 @@ def conv2d_grad_wrt_weights(input,
``(int1, int2)``
``(int1, int2)``
pad input with a symmetric border of ``int1`` rows and
pad input with a symmetric border of ``int1`` rows and
``int2`` columns, then perform a valid convolution.
``int2`` columns, then perform a valid convolution.
subsample : tuple of len 2
subsample : tuple of len 2
The subsampling used in the forward pass of the convolutional
The subsampling used in the forward pass of the convolutional
operation. Also called strides elsewhere.
operation. Also called strides elsewhere.
...
@@ -337,6 +354,9 @@ def conv2d_grad_wrt_weights(input,
...
@@ -337,6 +354,9 @@ def conv2d_grad_wrt_weights(input,
referred to as a convolution, and this is the default. If
referred to as a convolution, and this is the default. If
``False``, the filters are not flipped and the operation is
``False``, the filters are not flipped and the operation is
referred to as a cross-correlation.
referred to as a cross-correlation.
filter_dilation : tuple of len 2
The filter dilation used in the forward pass.
Also known as input striding.
Returns
Returns
-------
-------
...
@@ -386,7 +406,8 @@ def conv2d_grad_wrt_weights(input,
...
@@ -386,7 +406,8 @@ def conv2d_grad_wrt_weights(input,
kshp
=
numerical_filter_shape
,
kshp
=
numerical_filter_shape
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
)
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
)
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[:
-
2
])
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[:
-
2
])
...
@@ -541,7 +562,8 @@ def bilinear_upsampling(input,
...
@@ -541,7 +562,8 @@ def bilinear_upsampling(input,
filter_shape
=
(
1
,
1
,
None
,
1
),
filter_shape
=
(
1
,
1
,
None
,
1
),
border_mode
=
(
pad
,
0
),
border_mode
=
(
pad
,
0
),
subsample
=
(
ratio
,
1
),
subsample
=
(
ratio
,
1
),
filter_flip
=
True
)
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
))
# upsampling cols
# upsampling cols
upsampled_mat
=
conv2d_grad_wrt_inputs
(
output_grad
=
upsampled_row
,
upsampled_mat
=
conv2d_grad_wrt_inputs
(
output_grad
=
upsampled_row
,
filters
=
kern
[
np
.
newaxis
,
filters
=
kern
[
np
.
newaxis
,
...
@@ -553,7 +575,8 @@ def bilinear_upsampling(input,
...
@@ -553,7 +575,8 @@ def bilinear_upsampling(input,
filter_shape
=
(
1
,
1
,
1
,
None
),
filter_shape
=
(
1
,
1
,
1
,
None
),
border_mode
=
(
0
,
pad
),
border_mode
=
(
0
,
pad
),
subsample
=
(
1
,
ratio
),
subsample
=
(
1
,
ratio
),
filter_flip
=
True
)
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
))
else
:
else
:
kern
=
bilinear_kernel_2D
(
ratio
=
ratio
,
normalize
=
True
)
kern
=
bilinear_kernel_2D
(
ratio
=
ratio
,
normalize
=
True
)
upsampled_mat
=
conv2d_grad_wrt_inputs
(
output_grad
=
concat_mat
,
upsampled_mat
=
conv2d_grad_wrt_inputs
(
output_grad
=
concat_mat
,
...
@@ -565,7 +588,8 @@ def bilinear_upsampling(input,
...
@@ -565,7 +588,8 @@ def bilinear_upsampling(input,
filter_shape
=
(
1
,
1
,
None
,
None
),
filter_shape
=
(
1
,
1
,
None
,
None
),
border_mode
=
(
pad
,
pad
),
border_mode
=
(
pad
,
pad
),
subsample
=
(
ratio
,
ratio
),
subsample
=
(
ratio
,
ratio
),
filter_flip
=
True
)
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
))
return
upsampled_mat
.
reshape
((
input
.
shape
[
0
],
input
.
shape
[
1
],
return
upsampled_mat
.
reshape
((
input
.
shape
[
0
],
input
.
shape
[
1
],
row
*
ratio
,
col
*
ratio
))
row
*
ratio
,
col
*
ratio
))
...
@@ -620,14 +644,18 @@ class BaseAbstractConv2d(Op):
...
@@ -620,14 +644,18 @@ class BaseAbstractConv2d(Op):
are not flipped and the operation is referred to as a
are not flipped and the operation is referred to as a
cross-correlation.
cross-correlation.
filter_dilation: tuple of len 2
Factor by which to subsample (stride) the input.
Also called dilation factor.
"""
"""
check_broadcast
=
False
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_flip'
,
'imshp'
,
'kshp'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_flip'
,
'imshp'
,
'kshp'
,
'filter_dilation'
)
def
__init__
(
self
,
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_
flip
=
True
):
filter_
dilation
=
(
1
,
1
)
):
if
isinstance
(
border_mode
,
integer_types
):
if
isinstance
(
border_mode
,
integer_types
):
border_mode
=
(
border_mode
,
border_mode
)
border_mode
=
(
border_mode
,
border_mode
)
...
@@ -673,6 +701,9 @@ class BaseAbstractConv2d(Op):
...
@@ -673,6 +701,9 @@ class BaseAbstractConv2d(Op):
if
len
(
subsample
)
!=
2
:
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
raise
ValueError
(
"subsample must have two elements"
)
self
.
subsample
=
tuple
(
subsample
)
self
.
subsample
=
tuple
(
subsample
)
if
len
(
filter_dilation
)
!=
2
:
raise
ValueError
(
"filter_dilation must have two elements"
)
self
.
filter_dilation
=
tuple
(
filter_dilation
)
def
flops
(
self
,
inp
,
outp
):
def
flops
(
self
,
inp
,
outp
):
""" Useful with the hack in profilemode to print the MFlops"""
""" Useful with the hack in profilemode to print the MFlops"""
...
@@ -694,7 +725,7 @@ class BaseAbstractConv2d(Op):
...
@@ -694,7 +725,7 @@ class BaseAbstractConv2d(Op):
# This may change in the future.
# This may change in the future.
return
False
return
False
def
conv2d
(
self
,
img
,
kern
,
mode
=
"valid"
):
def
conv2d
(
self
,
img
,
kern
,
mode
=
"valid"
,
dilation
=
(
1
,
1
)
):
"""
"""
Basic slow python implementatation for DebugMode
Basic slow python implementatation for DebugMode
"""
"""
...
@@ -708,8 +739,16 @@ class BaseAbstractConv2d(Op):
...
@@ -708,8 +739,16 @@ class BaseAbstractConv2d(Op):
'invalid mode {}, which must be either '
'invalid mode {}, which must be either '
'"valid" or "full"'
.
format
(
mode
))
'"valid" or "full"'
.
format
(
mode
))
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
[
1
,
1
])
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
[
1
,
1
],
dilation
)
out
=
numpy
.
zeros
(
out_shape
,
dtype
=
img
.
dtype
)
out
=
numpy
.
zeros
(
out_shape
,
dtype
=
img
.
dtype
)
dil_kern_shp
=
kern
.
shape
[:
-
2
]
+
((
kern
.
shape
[
-
2
]
-
1
)
*
dilation
[
0
]
+
1
,
(
kern
.
shape
[
-
1
]
-
1
)
*
dilation
[
1
]
+
1
)
dilated_kern
=
numpy
.
zeros
(
dil_kern_shp
,
dtype
=
kern
.
dtype
)
dilated_kern
[:,
:,
::
dilation
[
0
],
::
dilation
[
1
]]
=
kern
val
=
_valfrommode
(
mode
)
val
=
_valfrommode
(
mode
)
bval
=
_bvalfromboundary
(
'fill'
)
bval
=
_bvalfromboundary
(
'fill'
)
...
@@ -720,7 +759,7 @@ class BaseAbstractConv2d(Op):
...
@@ -720,7 +759,7 @@ class BaseAbstractConv2d(Op):
for
im0
in
xrange
(
img
.
shape
[
1
]):
for
im0
in
xrange
(
img
.
shape
[
1
]):
# some cast generates a warning here
# some cast generates a warning here
out
[
b
,
n
,
...
]
+=
_convolve2d
(
img
[
b
,
im0
,
...
],
out
[
b
,
n
,
...
]
+=
_convolve2d
(
img
[
b
,
im0
,
...
],
kern
[
n
,
im0
,
...
],
dilated_
kern
[
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
1
,
val
,
bval
,
0
)
return
out
return
out
...
@@ -736,10 +775,11 @@ class AbstractConv2d(BaseAbstractConv2d):
...
@@ -736,10 +775,11 @@ class AbstractConv2d(BaseAbstractConv2d):
kshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
super
(
AbstractConv2d
,
self
)
.
__init__
(
imshp
,
kshp
,
filter_dilation
=
(
1
,
1
)):
border_mode
,
subsample
,
super
(
AbstractConv2d
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
filter_flip
)
subsample
,
filter_flip
,
filter_dilation
)
def
make_node
(
self
,
img
,
kern
):
def
make_node
(
self
,
img
,
kern
):
# Make sure both inputs are Variables with the same Type
# Make sure both inputs are Variables with the same Type
...
@@ -766,6 +806,8 @@ class AbstractConv2d(BaseAbstractConv2d):
...
@@ -766,6 +806,8 @@ class AbstractConv2d(BaseAbstractConv2d):
img
,
kern
=
inp
img
,
kern
=
inp
img
=
numpy
.
asarray
(
img
)
img
=
numpy
.
asarray
(
img
)
kern
=
numpy
.
asarray
(
kern
)
kern
=
numpy
.
asarray
(
kern
)
dil_kernshp
=
((
kern
.
shape
[
2
]
-
1
)
*
self
.
filter_dilation
[
0
]
+
1
,
(
kern
.
shape
[
3
]
-
1
)
*
self
.
filter_dilation
[
1
]
+
1
)
o
,
=
out_
o
,
=
out_
mode
=
self
.
border_mode
mode
=
self
.
border_mode
...
@@ -777,9 +819,9 @@ class AbstractConv2d(BaseAbstractConv2d):
...
@@ -777,9 +819,9 @@ class AbstractConv2d(BaseAbstractConv2d):
' integers'
.
format
(
mode
))
' integers'
.
format
(
mode
))
if
mode
==
"full"
:
if
mode
==
"full"
:
mode
=
(
kern
.
shape
[
2
]
-
1
,
kern
.
shape
[
3
]
-
1
)
mode
=
(
dil_kernshp
[
0
]
-
1
,
dil_kernshp
[
1
]
-
1
)
elif
mode
==
"half"
:
elif
mode
==
"half"
:
mode
=
(
kern
.
shape
[
2
]
//
2
,
kern
.
shape
[
3
]
//
2
)
mode
=
(
dil_kernshp
[
0
]
//
2
,
dil_kernshp
[
1
]
//
2
)
if
isinstance
(
mode
,
tuple
):
if
isinstance
(
mode
,
tuple
):
pad_h
,
pad_w
=
map
(
int
,
mode
)
pad_h
,
pad_w
=
map
(
int
,
mode
)
mode
=
"valid"
mode
=
"valid"
...
@@ -790,7 +832,7 @@ class AbstractConv2d(BaseAbstractConv2d):
...
@@ -790,7 +832,7 @@ class AbstractConv2d(BaseAbstractConv2d):
img
=
new_img
img
=
new_img
if
not
self
.
filter_flip
:
if
not
self
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
conv_out
=
self
.
conv2d
(
img
,
kern
,
mode
=
"valid"
)
conv_out
=
self
.
conv2d
(
img
,
kern
,
mode
=
"valid"
,
dilation
=
self
.
filter_dilation
)
conv_out
=
conv_out
[:,
:,
::
self
.
subsample
[
0
],
::
self
.
subsample
[
1
]]
conv_out
=
conv_out
[:,
:,
::
self
.
subsample
[
0
],
::
self
.
subsample
[
1
]]
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
conv_out
)
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
conv_out
)
...
@@ -812,12 +854,14 @@ class AbstractConv2d(BaseAbstractConv2d):
...
@@ -812,12 +854,14 @@ class AbstractConv2d(BaseAbstractConv2d):
d_bottom
=
AbstractConv2d_gradInputs
(
self
.
imshp
,
self
.
kshp
,
d_bottom
=
AbstractConv2d_gradInputs
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
)(
self
.
filter_flip
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
)(
self
.
filter_flip
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
bottom
,
top
,
weights
.
shape
[
-
2
:])
...
@@ -844,7 +888,7 @@ class AbstractConv2d(BaseAbstractConv2d):
...
@@ -844,7 +888,7 @@ class AbstractConv2d(BaseAbstractConv2d):
kshp
=
[
kshp
[
i
]
if
self
.
kshp
[
i
]
is
None
else
self
.
kshp
[
i
]
kshp
=
[
kshp
[
i
]
if
self
.
kshp
[
i
]
is
None
else
self
.
kshp
[
i
]
for
i
in
range
(
4
)]
for
i
in
range
(
4
)]
res
=
get_conv_output_shape
(
imshp
,
kshp
,
self
.
border_mode
,
res
=
get_conv_output_shape
(
imshp
,
kshp
,
self
.
border_mode
,
self
.
subsample
)
self
.
subsample
,
self
.
filter_dilation
)
return
[
res
]
return
[
res
]
...
@@ -863,11 +907,13 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
...
@@ -863,11 +907,13 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
kshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
super
(
AbstractConv2d_gradWeights
,
self
)
.
__init__
(
imshp
,
kshp
,
super
(
AbstractConv2d_gradWeights
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
border_mode
,
subsample
,
subsample
,
filter_flip
)
filter_flip
,
filter_dilation
)
# Update shape/height_width
# Update shape/height_width
def
make_node
(
self
,
img
,
topgrad
,
shape
):
def
make_node
(
self
,
img
,
topgrad
,
shape
):
...
@@ -943,15 +989,16 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
...
@@ -943,15 +989,16 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
d_bottom
=
AbstractConv2d_gradInputs
(
self
.
imshp
,
self
.
kshp
,
d_bottom
=
AbstractConv2d_gradInputs
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
)(
self
.
filter_flip
,
weights
,
self
.
filter_dilation
)(
weights
,
top
,
top
,
bottom
.
shape
[
-
2
:])
bottom
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_flip
)(
bottom
,
weights
)
self
.
filter_flip
,
self
.
filter_dilation
)(
bottom
,
weights
)
# Make sure that the broadcastable pattern of the inputs is used
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
# that the dimensions are broadcastable.
...
@@ -998,11 +1045,13 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
...
@@ -998,11 +1045,13 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
kshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
super
(
AbstractConv2d_gradInputs
,
self
)
.
__init__
(
imshp
,
kshp
,
super
(
AbstractConv2d_gradInputs
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
border_mode
,
subsample
,
subsample
,
filter_flip
)
filter_flip
,
filter_dilation
)
# Update shape/height_width
# Update shape/height_width
def
make_node
(
self
,
kern
,
topgrad
,
shape
):
def
make_node
(
self
,
kern
,
topgrad
,
shape
):
...
@@ -1070,12 +1119,15 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
...
@@ -1070,12 +1119,15 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
bottom
,
=
grads
bottom
,
=
grads
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
)(
self
.
subsample
,
bottom
,
top
,
self
.
filter_flip
,
weights
.
shape
[
-
2
:])
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
)(
self
.
border_mode
,
bottom
,
weights
)
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_dilation
)(
bottom
,
weights
)
# Make sure that the broadcastable pattern of the inputs is used
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
# that the dimensions are broadcastable.
...
...
theano/tensor/nnet/corr.py
浏览文件 @
a668c6c5
...
@@ -27,12 +27,14 @@ class BaseCorrMM(gof.Op):
...
@@ -27,12 +27,14 @@ class BaseCorrMM(gof.Op):
or a pair of integers
or a pair of integers
subsample
subsample
Perform subsampling of the output (default: (1, 1)).
Perform subsampling of the output (default: (1, 1)).
filter_dilation
Perform dilated correlation (default: (1,1))
"""
"""
check_broadcast
=
False
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
)):
if
isinstance
(
border_mode
,
integer_types
):
if
isinstance
(
border_mode
,
integer_types
):
if
border_mode
<
0
:
if
border_mode
<
0
:
raise
ValueError
(
raise
ValueError
(
...
@@ -55,7 +57,10 @@ class BaseCorrMM(gof.Op):
...
@@ -55,7 +57,10 @@ class BaseCorrMM(gof.Op):
self
.
border_mode
=
border_mode
self
.
border_mode
=
border_mode
if
len
(
subsample
)
!=
2
:
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
raise
ValueError
(
"subsample must have two elements"
)
if
len
(
filter_dilation
)
!=
2
:
raise
ValueError
(
"filter_dilation must have two elements"
)
self
.
subsample
=
tuple
(
subsample
)
self
.
subsample
=
tuple
(
subsample
)
self
.
filter_dilation
=
tuple
(
filter_dilation
)
@property
@property
def
pad
(
self
):
def
pad
(
self
):
...
@@ -64,10 +69,11 @@ class BaseCorrMM(gof.Op):
...
@@ -64,10 +69,11 @@ class BaseCorrMM(gof.Op):
return
(
0
,
0
)
return
(
0
,
0
)
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s
,
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
,
self
.
border_mode
,
self
.
border_mode
,
str
(
self
.
subsample
))
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
))
def
c_support_code
(
self
):
def
c_support_code
(
self
):
return
blas_header_text
()
return
blas_header_text
()
...
@@ -89,7 +95,7 @@ class BaseCorrMM(gof.Op):
...
@@ -89,7 +95,7 @@ class BaseCorrMM(gof.Op):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
1
,
1
)
return
(
1
,
2
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -155,6 +161,7 @@ class BaseCorrMM(gof.Op):
...
@@ -155,6 +161,7 @@ class BaseCorrMM(gof.Op):
if
not
theano
.
config
.
blas
.
ldflags
:
if
not
theano
.
config
.
blas
.
ldflags
:
raise
NotImplementedError
(
"C code for CorrMM* classes need a blas library."
)
raise
NotImplementedError
(
"C code for CorrMM* classes need a blas library."
)
dH
,
dW
=
self
.
subsample
dH
,
dW
=
self
.
subsample
dilH
,
dilW
=
self
.
filter_dilation
if
self
.
border_mode
==
"half"
:
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
elif
self
.
border_mode
==
"full"
:
...
@@ -201,6 +208,8 @@ class BaseCorrMM(gof.Op):
...
@@ -201,6 +208,8 @@ class BaseCorrMM(gof.Op):
// Optional args
// Optional args
int dH =
%(dH)
s;
int dH =
%(dH)
s;
int dW =
%(dW)
s;
int dW =
%(dW)
s;
int dilH =
%(dilH)
s;
int dilW =
%(dilW)
s;
int padH =
%(padH)
s;
int padH =
%(padH)
s;
int padW =
%(padW)
s;
int padW =
%(padW)
s;
...
@@ -224,39 +233,43 @@ class BaseCorrMM(gof.Op):
...
@@ -224,39 +233,43 @@ class BaseCorrMM(gof.Op):
}
}
else if (padH == -2) {
else if (padH == -2) {
// vertical full padding, we can infer the kernel height
// vertical full padding, we can infer the kernel height
kH =
2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH
;
kH =
(2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1
;
}
}
else {
else {
// explicit padding, we can infer the kernel height
// explicit padding, we can infer the kernel height
kH =
PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH
;
kH =
(PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1
;
}
}
if ((dW != 1) || (padW == -1)) {
if ((dW != 1) || (padW == -1)) {
kW =
%(width)
s;
kW =
%(width)
s;
}
}
else if (padW == -2) {
else if (padW == -2) {
kW =
2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW
;
kW =
(2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1
;
}
}
else {
else {
kW =
PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW
;
kW =
(PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1
;
}
}
}
}
// Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
// Auto-padding if requested
if (padH == -1) { // vertical half padding
if (padH == -1) { // vertical half padding
padH = kH / 2;
padH =
dil_
kH / 2;
}
}
else if (padH == -2) { // vertical full padding
else if (padH == -2) { // vertical full padding
padH = kH - 1;
padH =
dil_
kH - 1;
}
}
else if (padH < 0) {
else if (padH < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH must be >= -2");
%(fail)
s
%(fail)
s
}
}
if (padW == -1) { // horizontal half padding
if (padW == -1) { // horizontal half padding
padW = kW / 2;
padW =
dil_
kW / 2;
}
}
else if (padW == -2) { // horizontal full padding
else if (padW == -2) { // horizontal full padding
padW = kW - 1;
padW =
dil_
kW - 1;
}
}
else if (padW < 0) {
else if (padW < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW must be >= -2");
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW must be >= -2");
...
@@ -268,15 +281,15 @@ class BaseCorrMM(gof.Op):
...
@@ -268,15 +281,15 @@ class BaseCorrMM(gof.Op):
switch(direction) {
switch(direction) {
case 0: // forward pass
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad -
weight
) / sample + 1
// height and width: top = (bottom + 2*pad -
((weight-1)*dil + 1)
) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH -
PyArray_DIMS(weights)[2]
) / dH + 1);
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH -
((PyArray_DIMS(weights)[2]-1)*dilH + 1)
) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW -
PyArray_DIMS(weights)[3]
) / dW + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW -
((PyArray_DIMS(weights)[3]-1)*dilW + 1)
) / dW + 1);
break;
break;
case 1: // backprop wrt. weights
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights =
bottom + 2*pad - (top - 1) * sample
// height and width: weights =
(bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[2] = (npy_intp)kH; // already inferred further above
...
@@ -284,11 +297,11 @@ class BaseCorrMM(gof.Op):
...
@@ -284,11 +297,11 @@ class BaseCorrMM(gof.Op):
break;
break;
case 2: // backprop wrt. inputs
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample +
weights
- 2*pad
// height and width: bottom = (top - 1) * sample +
(weights-1)*dil + 1
- 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH +
PyArray_DIMS(weights)[2]
- 2*padH);
out_dim[2] = (npy_intp)((dH != 1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH +
(PyArray_DIMS(weights)[2]-1)*dilH + 1
- 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW +
PyArray_DIMS(weights)[3]
- 2*padW);
out_dim[3] = (npy_intp)((dW != 1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW +
(PyArray_DIMS(weights)[3]-1)*dilW + 1
- 2*padW);
break;
break;
default:
default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2
\\
n");
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2
\\
n");
...
@@ -326,7 +339,7 @@ class BaseCorrMM(gof.Op):
...
@@ -326,7 +339,7 @@ class BaseCorrMM(gof.Op):
}
}
// Call corrMM code
// Call corrMM code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, padH, padW);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW,
dilH, dilW,
padH, padW);
if (out2==NULL){
if (out2==NULL){
%(fail)
s
%(fail)
s
}
}
...
@@ -357,10 +370,15 @@ class CorrMM(BaseCorrMM):
...
@@ -357,10 +370,15 @@ class CorrMM(BaseCorrMM):
`(sv, sh)` is equivalent to `CorrMM(...)(...)[:,:,::sv, ::sh]`,
`(sv, sh)` is equivalent to `CorrMM(...)(...)[:,:,::sv, ::sh]`,
but faster.
but faster.
Set to `(1, 1)` to disable subsampling.
Set to `(1, 1)` to disable subsampling.
filter_dilation
The filter dilation operation applied to each input image.
Should be a tuple with 2 elements.
Set to `(1, 1)` to disable filter dilation.
"""
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
super
(
CorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
)
filter_dilation
=
(
1
,
1
)):
super
(
CorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
)
def
make_node
(
self
,
img
,
kern
):
def
make_node
(
self
,
img
,
kern
):
img
=
as_tensor_variable
(
img
)
img
=
as_tensor_variable
(
img
)
...
@@ -382,7 +400,8 @@ class CorrMM(BaseCorrMM):
...
@@ -382,7 +400,8 @@ class CorrMM(BaseCorrMM):
imshp
,
imshp
,
kshp
,
kshp
,
self
.
border_mode
,
self
.
border_mode
,
self
.
subsample
)
self
.
subsample
,
self
.
filter_dilation
)
return
[
res
]
return
[
res
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
...
@@ -395,11 +414,13 @@ class CorrMM(BaseCorrMM):
...
@@ -395,11 +414,13 @@ class CorrMM(BaseCorrMM):
bottom
,
weights
=
inp
bottom
,
weights
=
inp
top
,
=
grads
top
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
)(
weights
,
top
,
self
.
subsample
,
bottom
.
shape
[
-
2
:])
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
top
,
self
.
subsample
,
weights
.
shape
[
-
2
:])
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
return
d_bottom
,
d_weights
...
@@ -415,8 +436,11 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -415,8 +436,11 @@ class CorrMM_gradWeights(BaseCorrMM):
"""
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
super
(
CorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
)
filter_dilation
=
(
1
,
1
)):
super
(
CorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
img
=
as_tensor_variable
(
img
)
img
=
as_tensor_variable
(
img
)
...
@@ -485,10 +509,12 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -485,10 +509,12 @@ class CorrMM_gradWeights(BaseCorrMM):
bottom
,
top
=
inp
[:
2
]
bottom
,
top
=
inp
[:
2
]
weights
,
=
grads
weights
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
)(
weights
,
top
,
self
.
subsample
,
bottom
.
shape
[
-
2
:])
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
if
len
(
inp
)
==
4
else
())
return
(
d_bottom
,
d_top
)
+
d_height_width
return
(
d_bottom
,
d_top
)
+
d_height_width
...
@@ -512,8 +538,10 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -512,8 +538,10 @@ class CorrMM_gradInputs(BaseCorrMM):
"""
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
)):
super
(
CorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
)
super
(
CorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
kern
=
as_tensor_variable
(
kern
)
kern
=
as_tensor_variable
(
kern
)
...
@@ -586,11 +614,13 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -586,11 +614,13 @@ class CorrMM_gradInputs(BaseCorrMM):
weights
,
top
=
inp
[:
2
]
weights
,
top
=
inp
[:
2
]
bottom
,
=
grads
bottom
,
=
grads
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
self
.
subsample
,
top
,
self
.
filter_dilation
)(
bottom
,
weights
.
shape
[
-
2
:])
top
,
weights
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
2
if
len
(
inp
)
==
4
else
())
return
(
d_weights
,
d_top
)
+
d_height_width
return
(
d_weights
,
d_top
)
+
d_height_width
...
...
theano/tensor/nnet/corr_gemm.c
浏览文件 @
a668c6c5
...
@@ -6,13 +6,13 @@ Copyright (c) 2014, The Regents of the University of California (Regents)
...
@@ -6,13 +6,13 @@ Copyright (c) 2014, The Regents of the University of California (Regents)
All rights reserved.
All rights reserved.
Redistribution and use in source and binary forms, with or without
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
...
@@ -31,20 +31,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
...
@@ -31,20 +31,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Loops for fast unfold + copy
// Loops for fast unfold + copy
void
im2col
(
const
%
(
float_type
)
s
*
data_im
,
const
int
channels
,
void
im2col
(
const
%
(
float_type
)
s
*
data_im
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
stride_h
,
const
int
stride_w
,
%
(
float_type
)
s
*
data_col
)
{
%
(
float_type
)
s
*
data_col
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
kernel_h
)
/
stride_h
+
1
;
// Implicit dilated kernel size
int
width_col
=
(
width
+
2
*
pad_w
-
kernel_w
)
/
stride_w
+
1
;
int
dil_kernel_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
int
dil_kernel_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
int
height_col
=
(
height
+
2
*
pad_h
-
dil_kernel_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
dil_kernel_w
)
/
stride_w
+
1
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%%
kernel_w
;
int
w_offset
=
c
%%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%%
kernel_h
;
int
h_offset
=
(
c
/
kernel_w
)
%%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
...
@@ -60,10 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels,
...
@@ -60,10 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels,
// accumulated into data_im.
// accumulated into data_im.
void
col2im
(
const
%
(
float_type
)
s
*
data_col
,
const
int
channels
,
void
col2im
(
const
%
(
float_type
)
s
*
data_col
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
patch_h
,
const
int
patch_w
,
const
int
height
,
const
int
width
,
const
int
patch_h
,
const
int
patch_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
%
(
float_type
)
s
*
data_im
)
{
const
int
stride_w
,
%
(
float_type
)
s
*
data_im
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
patch_h
)
/
stride_h
+
1
;
// Implicit dilated patch
int
width_col
=
(
width
+
2
*
pad_w
-
patch_w
)
/
stride_w
+
1
;
int
dil_patch_h
=
(
patch_h
-
1
)
*
dilation_h
+
1
;
int
dil_patch_w
=
(
patch_w
-
1
)
*
dilation_w
+
1
;
int
height_col
=
(
height
+
2
*
pad_h
-
dil_patch_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
dil_patch_w
)
/
stride_w
+
1
;
int
num_kernels
=
channels
*
height
*
width
;
int
num_kernels
=
channels
*
height
*
width
;
int
channels_col
=
channels
*
patch_h
*
patch_w
;
int
channels_col
=
channels
*
patch_h
*
patch_w
;
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
...
@@ -71,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels,
...
@@ -71,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels,
int
h_offset
=
(
c
/
patch_w
)
%%
patch_h
;
int
h_offset
=
(
c
/
patch_w
)
%%
patch_h
;
int
c_im
=
c
/
patch_h
/
patch_w
;
int
c_im
=
c
/
patch_h
/
patch_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
]
+=
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
]
+=
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
];
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
];
...
@@ -91,13 +99,15 @@ void col2im(const %(float_type)s* data_col, const int channels,
...
@@ -91,13 +99,15 @@ void col2im(const %(float_type)s* data_col, const int channels,
// CPU version author: Jesse Livezey
// CPU version author: Jesse Livezey
// CPU version adapted from GPU version
// CPU version adapted from GPU version
PyArrayObject
*
corrMM
(
PyArrayObject
*
bottom
,
PyArrayObject
*
corrMM
(
PyArrayObject
*
bottom
,
PyArrayObject
*
weight
,
PyArrayObject
*
weight
,
PyArrayObject
*
top
,
PyArrayObject
*
top
,
const
int
direction
,
const
int
direction
,
const
int
dH
=
1
,
const
int
dH
=
1
,
const
int
dW
=
1
,
const
int
dW
=
1
,
const
int
padH
=
0
,
const
int
dilH
=
1
,
const
int
padW
=
0
)
const
int
dilW
=
1
,
const
int
padH
=
0
,
const
int
padW
=
0
)
{
{
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
{
{
...
@@ -109,7 +119,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -109,7 +119,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM received bottom with wrong type."
);
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM received bottom with wrong type."
);
return
NULL
;
return
NULL
;
}
}
if
(
PyArray_NDIM
(
weight
)
!=
4
)
if
(
PyArray_NDIM
(
weight
)
!=
4
)
{
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM requires weight of 4D"
);
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM requires weight of 4D"
);
...
@@ -151,9 +161,12 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -151,9 +161,12 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
"CorrMM images and kernel must have the same stack size
\n
"
);
"CorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
return
NULL
;
}
}
// implicit dilated filter
const
int
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth)
// top: (batchSize, nFilters, topHeight, topWidth)
const
int
topHeight
=
(
bottomHeight
+
2
*
padH
-
kH
)
/
dH
+
1
;
const
int
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_
kH
)
/
dH
+
1
;
const
int
topWidth
=
(
bottomWidth
+
2
*
padW
-
kW
)
/
dW
+
1
;
const
int
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_
kW
)
/
dW
+
1
;
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
...
@@ -176,9 +189,9 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -176,9 +189,9 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
col_dim
[
0
]
=
(
npy_intp
)(
nChannels
*
kW
*
kH
);
col_dim
[
0
]
=
(
npy_intp
)(
nChannels
*
kW
*
kH
);
col_dim
[
1
]
=
(
npy_intp
)(
topHeight
*
topWidth
);
col_dim
[
1
]
=
(
npy_intp
)(
topHeight
*
topWidth
);
PyArrayObject
*
col
=
(
PyArrayObject
*
)
PyArray_EMPTY
(
2
,
PyArrayObject
*
col
=
(
PyArrayObject
*
)
PyArray_EMPTY
(
2
,
col_dim
,
col_dim
,
PyArray_TYPE
(
top
),
PyArray_TYPE
(
top
),
0
);
0
);
if
(
NULL
==
col
)
if
(
NULL
==
col
)
{
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
...
@@ -206,7 +219,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -206,7 +219,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
int
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
// Second, gemm
// Second, gemm
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
&
N_
,
&
M_
,
&
K_
,
&
N_
,
&
M_
,
&
K_
,
...
@@ -255,7 +269,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -255,7 +269,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
int
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
// Second, gemm
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// for the first iteration and beta = 1 for subsequent ones. (This
...
@@ -299,7 +314,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -299,7 +314,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
}
}
else
if
(
direction
==
2
)
{
// backprop wrt. inputs
else
if
(
direction
==
2
)
{
// backprop wrt. inputs
output
=
bottom
;
output
=
bottom
;
// bottom is set to zero here rather than inside of col2im
// bottom is set to zero here rather than inside of col2im
PyArray_FILLWBYTE
(
bottom
,
0
);
PyArray_FILLWBYTE
(
bottom
,
0
);
// full convolution: gemm, then col2im
// full convolution: gemm, then col2im
// Iterate over batch
// Iterate over batch
...
@@ -314,7 +329,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -314,7 +329,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
&
N_
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
&
N_
);
// col2im back to the data
// col2im back to the data
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
nChannels
,
bottomHeight
,
bottomWidth
,
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
);
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
);
}
}
/*
/*
// Original caffe code for comparison
// Original caffe code for comparison
...
...
theano/tensor/nnet/opt.py
浏览文件 @
a668c6c5
...
@@ -79,7 +79,8 @@ def local_abstractconv_gemm(node):
...
@@ -79,7 +79,8 @@ def local_abstractconv_gemm(node):
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
CorrMM
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
CorrMM
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
img
,
kern
)
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
kern
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
return
[
rval
]
...
@@ -97,7 +98,8 @@ def local_abstractconv_gradweight_gemm(node):
...
@@ -97,7 +98,8 @@ def local_abstractconv_gradweight_gemm(node):
return
None
return
None
rval
=
CorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
CorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
img
,
topgrad
,
shape
)
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
# need to flip the kernel if necessary
# need to flip the kernel if necessary
...
@@ -124,8 +126,9 @@ def local_abstractconv_gradinputs_gemm(node):
...
@@ -124,8 +126,9 @@ def local_abstractconv_gradinputs_gemm(node):
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
CorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
CorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
kern
,
topgrad
,
subsample
=
node
.
op
.
subsample
,
shape
)
filter_dilation
=
node
.
op
.
filter_dilation
)(
kern
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
return
[
rval
]
...
@@ -221,7 +224,9 @@ def local_conv2d_gradweight_cpu(node):
...
@@ -221,7 +224,9 @@ def local_conv2d_gradweight_cpu(node):
assert
len
(
op_imshp
)
==
4
and
len
(
op_kshp
)
==
4
assert
len
(
op_imshp
)
==
4
and
len
(
op_kshp
)
==
4
outshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
outshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
node
.
op
.
border_mode
,
node
.
op
.
subsample
)[
2
:]
node
.
op
.
border_mode
,
node
.
op
.
subsample
,
node
.
op
.
filter_dilation
)[
2
:]
fulloutshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
fulloutshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
node
.
op
.
border_mode
,
(
1
,
1
))[
2
:]
node
.
op
.
border_mode
,
(
1
,
1
))[
2
:]
...
@@ -334,7 +339,9 @@ def local_conv2d_gradinputs_cpu(node):
...
@@ -334,7 +339,9 @@ def local_conv2d_gradinputs_cpu(node):
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
outshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
outshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
node
.
op
.
border_mode
,
node
.
op
.
subsample
)[
2
:]
node
.
op
.
border_mode
,
node
.
op
.
subsample
,
node
.
op
.
filter_dilation
)[
2
:]
fulloutshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
fulloutshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
node
.
op
.
border_mode
,
(
1
,
1
))[
2
:]
node
.
op
.
border_mode
,
(
1
,
1
))[
2
:]
...
...
theano/tensor/nnet/tests/test_abstract_conv.py
浏览文件 @
a668c6c5
...
@@ -4,7 +4,6 @@ import numpy
...
@@ -4,7 +4,6 @@ import numpy
import
numpy
as
np
import
numpy
as
np
from
nose.plugins.skip
import
SkipTest
from
nose.plugins.skip
import
SkipTest
from
nose.tools
import
assert_raises
from
nose.tools
import
assert_raises
import
theano
import
theano
from
theano
import
tensor
from
theano
import
tensor
from
theano.gof.opt
import
check_stack_trace
from
theano.gof.opt
import
check_stack_trace
...
@@ -24,46 +23,57 @@ from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
...
@@ -24,46 +23,57 @@ from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
from
theano.tensor.nnet.ConvTransp3D
import
ConvTransp3D
from
theano.tensor.nnet.ConvTransp3D
import
ConvTransp3D
def
conv_corr
(
inputs
,
filters
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
def
conv_corr
(
inputs
,
filters
,
border_mode
=
"valid"
,
conv_mode
=
'conv'
):
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
filter_dilation
=
(
1
,
1
)):
if
conv_mode
==
'conv'
:
if
conv_mode
==
'conv'
:
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
return
corr
.
CorrMM
(
border_mode
,
subsample
)(
inputs
,
filters
)
return
corr
.
CorrMM
(
border_mode
,
subsample
,
filter_dilation
)(
inputs
,
filters
)
def
conv_corr_gw
(
inputs
,
topgrad
,
filters_shape
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
):
rval
=
corr
.
CorrMM_gradWeights
(
border_mode
,
subsample
)(
inputs
,
topgrad
,
def
conv_corr_gw
(
inputs
,
topgrad
,
filters_shape
,
filters_shape
[
2
:])
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
filter_dilation
=
(
1
,
1
)):
rval
=
corr
.
CorrMM_gradWeights
(
border_mode
,
subsample
,
filter_dilation
)(
inputs
,
topgrad
,
filters_shape
[
2
:])
if
conv_mode
==
'conv'
:
if
conv_mode
==
'conv'
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
return
rval
return
rval
def
conv_corr_gi
(
filters
,
topgrad
,
inputs_shape
,
border_mode
=
"valid"
,
def
conv_corr_gi
(
filters
,
topgrad
,
inputs_shape
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
):
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
filter_dilation
=
(
1
,
1
)):
if
conv_mode
==
'conv'
:
if
conv_mode
==
'conv'
:
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
return
corr
.
CorrMM_gradInputs
(
border_mode
,
subsample
)(
filters
,
topgrad
,
return
corr
.
CorrMM_gradInputs
(
border_mode
,
inputs_shape
[
2
:])
subsample
,
filter_dilation
)(
filters
,
topgrad
,
inputs_shape
[
2
:])
class
TestGetConvOutShape
(
unittest
.
TestCase
):
class
TestGetConvOutShape
(
unittest
.
TestCase
):
def
test_basic
(
self
):
def
test_basic
(
self
):
image_shape
,
kernel_shape
=
(
3
,
2
,
8
,
9
),
(
4
,
2
,
5
,
6
)
image_shape
,
kernel_shape
=
(
3
,
2
,
12
,
9
),
(
4
,
2
,
5
,
6
)
sub_sample
=
(
1
,
2
)
sub_sample
=
(
1
,
2
)
filter_dilation
=
(
2
,
1
)
test1_params
=
get_conv_output_shape
(
test1_params
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
'valid'
,
sub_sample
)
image_shape
,
kernel_shape
,
'valid'
,
sub_sample
,
filter_dilation
)
test2_params
=
get_conv_output_shape
(
test2_params
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
'half'
,
sub_sample
)
image_shape
,
kernel_shape
,
'half'
,
sub_sample
,
filter_dilation
)
test3_params
=
get_conv_output_shape
(
test3_params
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
'full'
,
sub_sample
)
image_shape
,
kernel_shape
,
'full'
,
sub_sample
,
filter_dilation
)
test4_params
=
get_conv_output_shape
(
test4_params
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
(
1
,
2
),
sub_sample
)
image_shape
,
kernel_shape
,
(
1
,
2
),
sub_sample
,
filter_dilation
)
self
.
assertTrue
(
test1_params
==
(
3
,
4
,
4
,
2
))
self
.
assertTrue
(
test1_params
==
(
3
,
4
,
4
,
2
))
self
.
assertTrue
(
test2_params
==
(
3
,
4
,
8
,
5
))
self
.
assertTrue
(
test2_params
==
(
3
,
4
,
12
,
5
))
self
.
assertTrue
(
test3_params
==
(
3
,
4
,
12
,
7
))
self
.
assertTrue
(
test3_params
==
(
3
,
4
,
20
,
7
))
self
.
assertTrue
(
test4_params
==
(
3
,
4
,
6
,
4
))
self
.
assertTrue
(
test4_params
==
(
3
,
4
,
6
,
4
))
...
@@ -71,35 +81,41 @@ class BaseTestConv2d(unittest.TestCase):
...
@@ -71,35 +81,41 @@ class BaseTestConv2d(unittest.TestCase):
def
setUp
(
self
):
def
setUp
(
self
):
if
theano
.
config
.
blas
.
ldflags
==
''
:
if
theano
.
config
.
blas
.
ldflags
==
''
:
raise
SkipTest
(
"BLAS required for reference"
)
raise
SkipTest
(
"BLAS required for reference"
)
self
.
inputs_shapes
=
[(
8
,
1
,
12
,
12
),
(
8
,
1
,
18
,
18
),
(
2
,
1
,
4
,
4
),
self
.
inputs_shapes
=
[(
8
,
1
,
6
,
6
),
(
8
,
1
,
8
,
8
),
(
2
,
1
,
7
,
7
),
(
6
,
1
,
10
,
11
),
(
2
,
1
,
6
,
5
),
(
1
,
5
,
9
,
9
)]
(
6
,
1
,
10
,
11
),
(
2
,
1
,
6
,
5
),
(
1
,
5
,
9
,
9
)]
self
.
filters_shapes
=
[(
5
,
1
,
2
,
2
),
(
4
,
1
,
3
,
3
),
(
2
,
1
,
3
,
3
),
self
.
filters_shapes
=
[(
5
,
1
,
2
,
2
),
(
4
,
1
,
3
,
3
),
(
2
,
1
,
3
,
3
),
(
1
,
1
,
2
,
5
),
(
4
,
1
,
2
,
2
),
(
4
,
5
,
2
,
2
)]
(
1
,
1
,
2
,
3
),
(
4
,
1
,
1
,
3
),
(
4
,
5
,
3
,
2
)]
self
.
subsamples
=
[(
1
,
1
),
(
2
,
2
),
(
2
,
4
)]
self
.
subsamples
=
[(
1
,
1
),
(
2
,
2
),
(
2
,
4
)]
self
.
filters_dilations
=
[(
1
,
1
),
(
1
,
2
),
(
2
,
1
)]
self
.
border_modes
=
[
"valid"
,
"full"
,
(
0
,
0
),
(
1
,
1
),
(
5
,
5
),
(
5
,
2
)]
self
.
border_modes
=
[
"valid"
,
"full"
,
(
0
,
0
),
(
1
,
1
),
(
5
,
5
),
(
5
,
2
)]
self
.
filter_flip
=
[
True
,
False
]
self
.
filter_flip
=
[
True
,
False
]
self
.
provide_shape
=
[
True
,
False
]
self
.
provide_shape
=
[
True
,
False
]
self
.
shared
=
theano
.
compile
.
shared
self
.
shared
=
theano
.
compile
.
shared
def
get_output_shape
(
self
,
inputs_shape
,
filters_shape
,
subsample
,
def
get_output_shape
(
self
,
inputs_shape
,
filters_shape
,
border_mode
):
subsample
,
border_mode
,
filter_dilation
):
dil_filters
=
((
filters_shape
[
2
]
-
1
)
*
filter_dilation
[
0
]
+
1
,
(
filters_shape
[
3
]
-
1
)
*
filter_dilation
[
1
]
+
1
)
if
border_mode
==
"valid"
:
if
border_mode
==
"valid"
:
border_mode
=
(
0
,
0
)
border_mode
=
(
0
,
0
)
if
border_mode
==
"full"
:
if
border_mode
==
"full"
:
border_mode
=
(
filters_shape
[
2
]
-
1
,
filters_shape
[
3
]
-
1
)
border_mode
=
(
dil_filters
[
0
]
-
1
,
dil_filters
[
1
]
-
1
)
batch_size
=
inputs_shape
[
0
]
batch_size
=
inputs_shape
[
0
]
num_filters
=
filters_shape
[
0
]
num_filters
=
filters_shape
[
0
]
return
((
batch_size
,
num_filters
,)
+
return
((
batch_size
,
num_filters
,)
+
tuple
(
None
if
i
is
None
or
k
is
None
tuple
(
None
if
i
is
None
or
k
is
None
else
((
i
+
2
*
pad
-
k
)
//
d
+
1
)
else
((
i
+
2
*
pad
-
((
k
-
1
)
*
fd
+
1
))
//
d
+
1
)
for
i
,
k
,
d
,
pad
in
zip
(
inputs_shape
[
2
:],
for
i
,
k
,
d
,
pad
,
fd
in
zip
(
inputs_shape
[
2
:],
filters_shape
[
2
:],
filters_shape
[
2
:],
subsample
,
border_mode
)))
subsample
,
border_mode
,
filter_dilation
)))
def
run_fwd
(
self
,
inputs_shape
,
filters_shape
,
ref
=
conv_corr
,
def
run_fwd
(
self
,
inputs_shape
,
filters_shape
,
ref
=
conv_corr
,
subsample
=
(
1
,
1
),
verify_grad
=
True
,
mode
=
None
,
subsample
=
(
1
,
1
),
verify_grad
=
True
,
mode
=
None
,
border_mode
=
'valid'
,
filter_flip
=
True
,
provide_shape
=
False
,
border_mode
=
'valid'
,
filter_flip
=
True
,
target_op
=
None
,
check_trace
=
False
):
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
,
filter_dilation
=
(
1
,
1
)):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
...
@@ -120,13 +136,15 @@ class BaseTestConv2d(unittest.TestCase):
...
@@ -120,13 +136,15 @@ class BaseTestConv2d(unittest.TestCase):
c_ref
=
ref
(
inputs
,
filters
,
c_ref
=
ref
(
inputs
,
filters
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)
conv_mode
=
conv_mode
,
filter_dilation
=
filter_dilation
)
c
=
conv
.
conv2d
(
inputs
,
filters
,
c
=
conv
.
conv2d
(
inputs
,
filters
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
input_shape
=
imshp
,
input_shape
=
imshp
,
filter_shape
=
kshp
)
filter_shape
=
kshp
,
filter_dilation
=
filter_dilation
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
...
@@ -143,15 +161,17 @@ class BaseTestConv2d(unittest.TestCase):
...
@@ -143,15 +161,17 @@ class BaseTestConv2d(unittest.TestCase):
if
verify_grad
:
if
verify_grad
:
utt
.
verify_grad
(
conv
.
AbstractConv2d
(
border_mode
=
border_mode
,
utt
.
verify_grad
(
conv
.
AbstractConv2d
(
border_mode
=
border_mode
,
imshp
=
imshp
,
kshp
=
kshp
,
imshp
=
imshp
,
kshp
=
kshp
,
subsample
=
subsample
),
subsample
=
subsample
,
filter_dilation
=
filter_dilation
),
[
inputs_val
,
filters_val
],
[
inputs_val
,
filters_val
],
mode
=
mode
)
mode
=
mode
)
def
run_gradweight
(
self
,
inputs_shape
,
filters_shape
,
output_shape
,
def
run_gradweight
(
self
,
inputs_shape
,
filters_shape
,
output_shape
,
ref
=
conv_corr_gw
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
ref
=
conv_corr_gw
,
subsample
=
(
1
,
1
),
verify_grad
=
True
,
mode
=
None
,
border_mode
=
'valid'
,
filter_flip
=
True
,
verify_grad
=
True
,
mode
=
None
,
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
):
border_mode
=
'valid'
,
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
,
filter_dilation
=
(
1
,
1
)):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
output_val
=
numpy
.
random
.
random
(
output_shape
)
.
astype
(
'float32'
)
output_val
=
numpy
.
random
.
random
(
output_shape
)
.
astype
(
'float32'
)
...
@@ -171,13 +191,15 @@ class BaseTestConv2d(unittest.TestCase):
...
@@ -171,13 +191,15 @@ class BaseTestConv2d(unittest.TestCase):
c
=
conv
.
AbstractConv2d_gradWeights
(
border_mode
=
border_mode
,
c
=
conv
.
AbstractConv2d_gradWeights
(
border_mode
=
border_mode
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
subsample
=
subsample
,
subsample
=
subsample
,
imshp
=
imshp
,
kshp
=
kshp
)
imshp
=
imshp
,
kshp
=
kshp
,
filter_dilation
=
filter_dilation
)
c
=
c
(
inputs
,
output
,
filters_shape
[
-
2
:])
c
=
c
(
inputs
,
output
,
filters_shape
[
-
2
:])
c_ref
=
ref
(
inputs
,
output
,
c_ref
=
ref
(
inputs
,
output
,
filters_shape
,
filters_shape
,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)
conv_mode
=
conv_mode
,
filter_dilation
=
filter_dilation
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
...
@@ -193,7 +215,8 @@ class BaseTestConv2d(unittest.TestCase):
...
@@ -193,7 +215,8 @@ class BaseTestConv2d(unittest.TestCase):
def
abstract_conv2d_gradweight
(
inputs_val
,
output_val
):
def
abstract_conv2d_gradweight
(
inputs_val
,
output_val
):
conv_op
=
conv
.
AbstractConv2d_gradWeights
(
border_mode
=
border_mode
,
conv_op
=
conv
.
AbstractConv2d_gradWeights
(
border_mode
=
border_mode
,
subsample
=
subsample
)
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
return
conv_op
(
inputs_val
,
output_val
,
filters_shape
[
-
2
:])
return
conv_op
(
inputs_val
,
output_val
,
filters_shape
[
-
2
:])
if
verify_grad
:
if
verify_grad
:
...
@@ -204,8 +227,8 @@ class BaseTestConv2d(unittest.TestCase):
...
@@ -204,8 +227,8 @@ class BaseTestConv2d(unittest.TestCase):
def
run_gradinput
(
self
,
inputs_shape
,
filters_shape
,
output_shape
,
def
run_gradinput
(
self
,
inputs_shape
,
filters_shape
,
output_shape
,
ref
=
conv_corr_gi
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
ref
=
conv_corr_gi
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
verify_grad
=
True
,
mode
=
None
,
border_mode
=
'valid'
,
verify_grad
=
True
,
mode
=
None
,
border_mode
=
'valid'
,
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
):
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
,
filter_dilation
=
(
1
,
1
)):
output_val
=
numpy
.
random
.
random
(
output_shape
)
.
astype
(
'float32'
)
output_val
=
numpy
.
random
.
random
(
output_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
output
=
self
.
shared
(
output_val
)
output
=
self
.
shared
(
output_val
)
...
@@ -224,11 +247,12 @@ class BaseTestConv2d(unittest.TestCase):
...
@@ -224,11 +247,12 @@ class BaseTestConv2d(unittest.TestCase):
c
=
conv
.
AbstractConv2d_gradInputs
(
border_mode
=
border_mode
,
c
=
conv
.
AbstractConv2d_gradInputs
(
border_mode
=
border_mode
,
subsample
=
subsample
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
imshp
=
imshp
,
kshp
=
kshp
)
imshp
=
imshp
,
kshp
=
kshp
,
filter_dilation
=
filter_dilation
)
c
=
c
(
filters
,
output
,
inputs_shape
[
-
2
:])
c
=
c
(
filters
,
output
,
inputs_shape
[
-
2
:])
c_ref
=
ref
(
filters
,
output
,
inputs_shape
,
c_ref
=
ref
(
filters
,
output
,
inputs_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)
conv_mode
=
conv_mode
,
filter_dilation
=
filter_dilation
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
...
@@ -244,7 +268,8 @@ class BaseTestConv2d(unittest.TestCase):
...
@@ -244,7 +268,8 @@ class BaseTestConv2d(unittest.TestCase):
def
abstract_conv2d_gradinputs
(
filters_val
,
output_val
):
def
abstract_conv2d_gradinputs
(
filters_val
,
output_val
):
conv_op
=
conv
.
AbstractConv2d_gradInputs
(
border_mode
=
border_mode
,
conv_op
=
conv
.
AbstractConv2d_gradInputs
(
border_mode
=
border_mode
,
subsample
=
subsample
)
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
return
conv_op
(
filters_val
,
output_val
,
inputs_shape
[
-
2
:])
return
conv_op
(
filters_val
,
output_val
,
inputs_shape
[
-
2
:])
if
verify_grad
:
if
verify_grad
:
...
@@ -266,15 +291,18 @@ class BaseTestConv2d(unittest.TestCase):
...
@@ -266,15 +291,18 @@ class BaseTestConv2d(unittest.TestCase):
self
.
tcase
(
i
,
f
,
ds
,
db
,
dflip
,
provide_shape
)
self
.
tcase
(
i
,
f
,
ds
,
db
,
dflip
,
provide_shape
)
except
SkipTest
as
e
:
except
SkipTest
as
e
:
skipped
=
e
skipped
=
e
for
s
in
self
.
subsamples
:
for
fd
in
self
.
filters_dilations
:
for
b
in
self
.
border_modes
:
for
s
in
self
.
subsamples
:
try
:
for
b
in
self
.
border_modes
:
self
.
tcase
(
i
,
f
,
s
,
db
,
dflip
,
dprovide_shape
)
try
:
except
SkipTest
as
e
:
self
.
tcase
(
i
,
f
,
s
,
db
,
dflip
,
skipped
=
e
dprovide_shape
,
fd
)
except
SkipTest
as
e
:
skipped
=
e
for
flip
in
self
.
filter_flip
:
for
flip
in
self
.
filter_flip
:
try
:
try
:
self
.
tcase
(
i
,
f
,
ds
,
db
,
flip
,
dprovide_shape
)
self
.
tcase
(
i
,
f
,
ds
,
db
,
flip
,
dprovide_shape
)
except
SkipTest
as
e
:
except
SkipTest
as
e
:
skipped
=
e
skipped
=
e
if
skipped
:
if
skipped
:
...
@@ -287,26 +315,27 @@ class TestCorrConv2d(BaseTestConv2d):
...
@@ -287,26 +315,27 @@ class TestCorrConv2d(BaseTestConv2d):
raise
SkipTest
()
raise
SkipTest
()
return
super
(
TestCorrConv2d
,
self
)
.
setUp
()
return
super
(
TestCorrConv2d
,
self
)
.
setUp
()
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
):
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
)
):
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
if
(
not
theano
.
config
.
blas
.
ldflags
or
if
(
not
theano
.
config
.
blas
.
ldflags
or
not
theano
.
config
.
cxx
or
not
theano
.
config
.
cxx
or
theano
.
config
.
mode
==
"FAST_COMPILE"
):
theano
.
config
.
mode
==
"FAST_COMPILE"
):
raise
SkipTest
(
"Need blas to test conv2d"
)
raise
SkipTest
(
"Need blas to test conv2d"
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
CorrMM
,
border_mode
=
b
,
filter_flip
=
flip
,
check_trace
=
True
)
target_op
=
CorrMM
,
check_trace
=
True
,
filter_dilation
=
fd
)
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
CorrMM_gradWeights
,
filter_flip
=
flip
,
target_op
=
CorrMM_gradWeights
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
CorrMM_gradInputs
,
filter_flip
=
flip
,
target_op
=
CorrMM_gradInputs
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
class
TestCpuConv2d
(
BaseTestConv2d
):
class
TestCpuConv2d
(
BaseTestConv2d
):
...
@@ -319,9 +348,11 @@ class TestCpuConv2d(BaseTestConv2d):
...
@@ -319,9 +348,11 @@ class TestCpuConv2d(BaseTestConv2d):
def
tearDown
(
self
):
def
tearDown
(
self
):
theano
.
config
.
on_opt_error
=
self
.
opt_err
theano
.
config
.
on_opt_error
=
self
.
opt_err
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
):
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
)):
if
fd
!=
(
1
,
1
):
raise
SkipTest
(
"No dilation implementation for basic cpu ConvOp."
)
mode
=
self
.
mode
mode
=
self
.
mode
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
fwd_OK
=
True
fwd_OK
=
True
gradweight_OK
=
True
gradweight_OK
=
True
gradinput_OK
=
True
gradinput_OK
=
True
...
@@ -347,11 +378,12 @@ class TestCpuConv2d(BaseTestConv2d):
...
@@ -347,11 +378,12 @@ class TestCpuConv2d(BaseTestConv2d):
if
fwd_OK
:
if
fwd_OK
:
if
not
theano
.
config
.
blas
.
ldflags
:
if
not
theano
.
config
.
blas
.
ldflags
:
raise
SkipTest
(
"Need blas to test conv2d"
)
raise
SkipTest
(
"Need blas to test conv2d"
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
verify_grad
=
(
gradweight_OK
and
gradinput_OK
),
subsample
=
s
,
verify_grad
=
(
gradweight_OK
and
gradinput_OK
),
mode
=
mode
,
provide_shape
=
provide_shape
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
ConvOp
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
ConvOp
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
else
:
self
.
assertRaises
(
AssertionError
,
self
.
assertRaises
(
AssertionError
,
self
.
run_fwd
,
self
.
run_fwd
,
...
@@ -363,7 +395,8 @@ class TestCpuConv2d(BaseTestConv2d):
...
@@ -363,7 +395,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
border_mode
=
b
,
filter_flip
=
flip
,
filter_flip
=
flip
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
if
gradweight_OK
:
if
gradweight_OK
:
if
not
theano
.
config
.
blas
.
ldflags
:
if
not
theano
.
config
.
blas
.
ldflags
:
...
@@ -374,7 +407,8 @@ class TestCpuConv2d(BaseTestConv2d):
...
@@ -374,7 +407,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
filter_flip
=
flip
,
target_op
=
(
ConvOp
,
ConvGrad3D
),
target_op
=
(
ConvOp
,
ConvGrad3D
),
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
else
:
self
.
assertRaises
(
AssertionError
,
self
.
assertRaises
(
AssertionError
,
self
.
run_gradweight
,
self
.
run_gradweight
,
...
@@ -387,7 +421,8 @@ class TestCpuConv2d(BaseTestConv2d):
...
@@ -387,7 +421,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
border_mode
=
b
,
filter_flip
=
flip
,
filter_flip
=
flip
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
if
gradinput_OK
:
if
gradinput_OK
:
if
not
theano
.
config
.
blas
.
ldflags
:
if
not
theano
.
config
.
blas
.
ldflags
:
...
@@ -398,7 +433,8 @@ class TestCpuConv2d(BaseTestConv2d):
...
@@ -398,7 +433,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
border_mode
=
b
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
filter_flip
=
flip
,
target_op
=
(
ConvOp
,
ConvTransp3D
),
target_op
=
(
ConvOp
,
ConvTransp3D
),
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
else
:
self
.
assertRaises
(
AssertionError
,
self
.
assertRaises
(
AssertionError
,
self
.
run_gradinput
,
self
.
run_gradinput
,
...
@@ -411,7 +447,8 @@ class TestCpuConv2d(BaseTestConv2d):
...
@@ -411,7 +447,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
border_mode
=
b
,
filter_flip
=
flip
,
filter_flip
=
flip
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
def
test_constant_shapes
():
def
test_constant_shapes
():
...
...
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
a668c6c5
...
@@ -32,8 +32,8 @@ class TestCorr2D(utt.InferShapeTester):
...
@@ -32,8 +32,8 @@ class TestCorr2D(utt.InferShapeTester):
def
validate
(
self
,
image_shape
,
filter_shape
,
def
validate
(
self
,
image_shape
,
filter_shape
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
input
=
None
,
filters
=
None
,
input
=
None
,
filters
=
None
,
verify_grad
=
True
,
verify_grad
=
True
,
non_contiguous
=
False
):
non_contiguous
=
False
,
filter_dilation
=
(
1
,
1
)
):
"""
"""
:param image_shape: The constant shape info passed to corrMM.
:param image_shape: The constant shape info passed to corrMM.
:param filter_shape: The constant shape info passed to corrMM.
:param filter_shape: The constant shape info passed to corrMM.
...
@@ -55,7 +55,8 @@ class TestCorr2D(utt.InferShapeTester):
...
@@ -55,7 +55,8 @@ class TestCorr2D(utt.InferShapeTester):
# define theano graph and function
# define theano graph and function
input
.
name
=
'input'
input
.
name
=
'input'
filters
.
name
=
'filters'
filters
.
name
=
'filters'
rval
=
corr
.
CorrMM
(
border_mode
,
subsample
)(
input
,
filters
)
rval
=
corr
.
CorrMM
(
border_mode
,
subsample
,
filter_dilation
)(
input
,
filters
)
rval
.
name
=
'corr_output'
rval
.
name
=
'corr_output'
return
rval
return
rval
...
@@ -86,20 +87,22 @@ class TestCorr2D(utt.InferShapeTester):
...
@@ -86,20 +87,22 @@ class TestCorr2D(utt.InferShapeTester):
orig_image_data
=
image_data
orig_image_data
=
image_data
img_shape2d
=
numpy
.
array
(
N_image_shape
[
-
2
:])
img_shape2d
=
numpy
.
array
(
N_image_shape
[
-
2
:])
fil_shape2d
=
numpy
.
array
(
N_filter_shape
[
-
2
:])
fil_shape2d
=
numpy
.
array
(
N_filter_shape
[
-
2
:])
dil_shape2d
=
numpy
.
array
(
filter_dilation
)
dil_fil_shape2d
=
(
fil_shape2d
-
1
)
*
dil_shape2d
+
1
subsample2d
=
numpy
.
array
(
subsample
)
subsample2d
=
numpy
.
array
(
subsample
)
if
border_mode
==
'full'
:
if
border_mode
==
'full'
:
padHW
=
(
fil_shape2d
-
1
)
padHW
=
(
dil_
fil_shape2d
-
1
)
elif
border_mode
==
'valid'
:
elif
border_mode
==
'valid'
:
padHW
=
numpy
.
array
([
0
,
0
])
padHW
=
numpy
.
array
([
0
,
0
])
elif
border_mode
==
'half'
:
elif
border_mode
==
'half'
:
padHW
=
numpy
.
floor
(
fil_shape2d
/
2
)
.
astype
(
'int32'
)
padHW
=
numpy
.
floor
(
dil_
fil_shape2d
/
2
)
.
astype
(
'int32'
)
elif
isinstance
(
border_mode
,
tuple
):
elif
isinstance
(
border_mode
,
tuple
):
padHW
=
numpy
.
array
(
border_mode
)
padHW
=
numpy
.
array
(
border_mode
)
elif
isinstance
(
border_mode
,
integer_types
):
elif
isinstance
(
border_mode
,
integer_types
):
padHW
=
numpy
.
array
([
border_mode
,
border_mode
])
padHW
=
numpy
.
array
([
border_mode
,
border_mode
])
else
:
else
:
raise
NotImplementedError
(
'Unsupported border_mode {}'
.
format
(
border_mode
))
raise
NotImplementedError
(
'Unsupported border_mode {}'
.
format
(
border_mode
))
out_shape2d
=
numpy
.
floor
((
img_shape2d
+
2
*
(
padHW
)
-
fil_shape2d
)
/
subsample2d
)
+
1
out_shape2d
=
numpy
.
floor
((
img_shape2d
+
2
*
(
padHW
)
-
dil_
fil_shape2d
)
/
subsample2d
)
+
1
# avoid numpy deprecation
# avoid numpy deprecation
out_shape2d
=
out_shape2d
.
astype
(
'int32'
)
out_shape2d
=
out_shape2d
.
astype
(
'int32'
)
out_shape
=
(
N_image_shape
[
0
],
N_filter_shape
[
0
])
+
tuple
(
out_shape2d
)
out_shape
=
(
N_image_shape
[
0
],
N_filter_shape
[
0
])
+
tuple
(
out_shape2d
)
...
@@ -124,8 +127,8 @@ class TestCorr2D(utt.InferShapeTester):
...
@@ -124,8 +127,8 @@ class TestCorr2D(utt.InferShapeTester):
for
col
in
range
(
ref_output
.
shape
[
3
]):
for
col
in
range
(
ref_output
.
shape
[
3
]):
icol
=
col
*
subsample
[
1
]
# image col
icol
=
col
*
subsample
[
1
]
# image col
ref_output
[
bb
,
nn
,
row
,
col
]
+=
(
image2d
[
ref_output
[
bb
,
nn
,
row
,
col
]
+=
(
image2d
[
irow
:
irow
+
N_filter_shape
[
2
],
irow
:
irow
+
dil_fil_shape2d
[
0
]:
filter_dilation
[
0
],
icol
:
icol
+
N_filter_shape
[
3
]]
*
filter2d
[::
-
1
,
::
-
1
]
icol
:
icol
+
dil_fil_shape2d
[
1
]:
filter_dilation
[
1
]]
*
filter2d
[::
-
1
,
::
-
1
]
)
.
sum
()
)
.
sum
()
self
.
assertTrue
(
_allclose
(
theano_output
,
ref_output
))
self
.
assertTrue
(
_allclose
(
theano_output
,
ref_output
))
...
@@ -186,6 +189,28 @@ class TestCorr2D(utt.InferShapeTester):
...
@@ -186,6 +189,28 @@ class TestCorr2D(utt.InferShapeTester):
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
1
,
subsample
=
(
3
,
3
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
1
,
subsample
=
(
3
,
3
))
def
test_filter_dilation
(
self
):
"""
Tests correlation where filter dilation != (1,1)
"""
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'valid'
,
filter_dilation
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
14
,
10
),
(
5
,
2
,
2
,
3
),
'valid'
,
filter_dilation
=
(
3
,
1
))
self
.
validate
((
1
,
1
,
14
,
14
),
(
1
,
1
,
3
,
3
),
'valid'
,
filter_dilation
=
(
2
,
3
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
,
filter_dilation
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
,
filter_dilation
=
(
3
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
'full'
,
filter_dilation
=
(
2
,
3
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'half'
,
filter_dilation
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'half'
,
filter_dilation
=
(
3
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
'half'
,
filter_dilation
=
(
2
,
3
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
(
1
,
1
),
filter_dilation
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
(
2
,
1
),
filter_dilation
=
(
2
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
(
1
,
2
),
filter_dilation
=
(
1
,
2
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
1
,
subsample
=
(
3
,
3
),
filter_dilation
=
(
2
,
2
))
@attr
(
'slow'
)
@attr
(
'slow'
)
def
test_shape_Constant_tensor
(
self
):
def
test_shape_Constant_tensor
(
self
):
"""
"""
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论