Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a668c6c5
提交
a668c6c5
authored
7月 01, 2016
作者:
Pascal Lamblin
提交者:
GitHub
7月 01, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4587 from niasla/dilated_convolution
Implementation of 2D dilated convolution/correlation.
上级
d78f44f6
2dcf3753
隐藏空白字符变更
内嵌
并排
正在显示
13 个修改的文件
包含
651 行增加
和
320 行删除
+651
-320
dnn.py
theano/gpuarray/dnn.py
+3
-0
blas.py
theano/sandbox/cuda/blas.py
+79
-44
corr_gemm.cu
theano/sandbox/cuda/corr_gemm.cu
+145
-46
dnn.py
theano/sandbox/cuda/dnn.py
+3
-0
opt.py
theano/sandbox/cuda/opt.py
+21
-9
test_abstractconv.py
theano/sandbox/cuda/tests/test_abstractconv.py
+22
-15
__init__.py
theano/tensor/nnet/__init__.py
+7
-2
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+104
-52
corr.py
theano/tensor/nnet/corr.py
+70
-40
corr_gemm.c
theano/tensor/nnet/corr_gemm.c
+44
-28
opt.py
theano/tensor/nnet/opt.py
+13
-6
test_abstract_conv.py
theano/tensor/nnet/tests/test_abstract_conv.py
+107
-70
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+33
-8
没有找到文件。
theano/gpuarray/dnn.py
浏览文件 @
a668c6c5
...
...
@@ -1393,6 +1393,9 @@ def local_abstractconv_cudnn(node):
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
if
(
node
.
op
.
filter_dilation
!=
(
1
,
1
)):
return
None
if
not
isinstance
(
inp1
.
type
,
GpuArrayType
):
return
None
...
...
theano/sandbox/cuda/blas.py
浏览文件 @
a668c6c5
...
...
@@ -855,15 +855,17 @@ class BaseGpuCorrMM(GpuOp):
or a pair of integers
subsample
Perform subsampling of the output (default: (1, 1)).
filter_dilation
Perform subsampling of the input, also known as dilation (default: (1, 1)).
pad
*deprecated*, now you should always use border_mode.
"""
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
pad
=
(
0
,
0
)):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
pad
=
(
0
,
0
)):
if
pad
!=
(
0
,
0
):
_logger
.
warning
(
'do not use pad for BaseGpuCorrMM; please set padding in '
...
...
@@ -885,7 +887,10 @@ class BaseGpuCorrMM(GpuOp):
self
.
border_mode
=
border_mode
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
self
.
subsample
=
subsample
if
len
(
filter_dilation
)
!=
2
:
raise
ValueError
(
"filter_dilation must have two elements"
)
self
.
subsample
=
tuple
(
subsample
)
self
.
filter_dilation
=
tuple
(
filter_dilation
)
@property
def
pad
(
self
):
...
...
@@ -894,10 +899,11 @@ class BaseGpuCorrMM(GpuOp):
return
(
0
,
0
)
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s
,
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
border_mode
,
str
(
self
.
subsample
))
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
))
def
flops
(
self
,
inp
,
outp
):
"""
...
...
@@ -922,7 +928,7 @@ class BaseGpuCorrMM(GpuOp):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
0
,
2
4
)
return
(
0
,
2
6
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
@@ -976,6 +982,7 @@ class BaseGpuCorrMM(GpuOp):
"""
dH
,
dW
=
self
.
subsample
dilH
,
dilW
=
self
.
filter_dilation
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
...
...
@@ -1022,6 +1029,8 @@ class BaseGpuCorrMM(GpuOp):
// Optional args
int dH =
%(dH)
s;
int dW =
%(dW)
s;
int dilH =
%(dilH)
s;
int dilW =
%(dilW)
s;
int padH =
%(padH)
s;
int padW =
%(padW)
s;
...
...
@@ -1045,39 +1054,43 @@ class BaseGpuCorrMM(GpuOp):
}
else if (padH == -2) {
// vertical full padding, we can infer the kernel height
kH =
2 - CudaNdarray_HOST_DIMS(bottom)[2] + (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH
;
kH =
(2 - CudaNdarray_HOST_DIMS(bottom)[2] + (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1
;
}
else {
// explicit padding, we can infer the kernel height
kH =
CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH
;
kH =
(CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH - (CudaNdarray_HOST_DIMS(top)[2] - 1)*dH - 1) / dilH + 1
;
}
if ((dW != 1) || (padW == -1)) {
kW =
%(width)
s;
}
else if (padW == -2) {
kW =
2 - CudaNdarray_HOST_DIMS(bottom)[3] + (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW
;
kW =
(2 - CudaNdarray_HOST_DIMS(bottom)[3] + (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1
;
}
else {
kW =
CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW
;
kW =
(CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW - (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW - 1) / dilW + 1
;
}
}
// Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
if (padH == -1) { // vertical half padding
padH = kH / 2;
padH =
dil_
kH / 2;
}
else if (padH == -2) { // vertical full padding
padH = kH - 1;
padH =
dil_
kH - 1;
}
else if (padH < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padH must be >= -2");
%(fail)
s
}
if (padW == -1) { // horizontal half padding
padW = kW / 2;
padW =
dil_
kW / 2;
}
else if (padW == -2) { // horizontal full padding
padW = kW - 1;
padW =
dil_
kW - 1;
}
else if (padW < 0) {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: padW must be >= -2");
...
...
@@ -1089,15 +1102,15 @@ class BaseGpuCorrMM(GpuOp):
switch(direction) {
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad -
weight
) / sample + 1
// height and width: top = (bottom + 2*pad -
((weight-1)*dil + 1)
) / sample + 1
out_dim[0] = CudaNdarray_HOST_DIMS(bottom)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[0];
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH -
CudaNdarray_HOST_DIMS(weights)[2]
) / dH + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW -
CudaNdarray_HOST_DIMS(weights)[3]
) / dW + 1;
out_dim[2] = (CudaNdarray_HOST_DIMS(bottom)[2] + 2*padH -
((CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1)
) / dH + 1;
out_dim[3] = (CudaNdarray_HOST_DIMS(bottom)[3] + 2*padW -
((CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1)
) / dW + 1;
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights =
bottom + 2*pad - (top - 1) * sample
// height and width: weights =
(bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = CudaNdarray_HOST_DIMS(top)[1];
out_dim[1] = CudaNdarray_HOST_DIMS(bottom)[1];
out_dim[2] = kH; // already inferred further above
...
...
@@ -1105,11 +1118,11 @@ class BaseGpuCorrMM(GpuOp):
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample +
weights
- 2*pad
// height and width: bottom = (top - 1) * sample +
(weights-1)*dil + 1
- 2*pad
out_dim[0] = CudaNdarray_HOST_DIMS(top)[0];
out_dim[1] = CudaNdarray_HOST_DIMS(weights)[1];
out_dim[2] = (dH != 1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH +
CudaNdarray_HOST_DIMS(weights)[2]
- 2*padH;
out_dim[3] = (dW != 1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW +
CudaNdarray_HOST_DIMS(weights)[3]
- 2*padW;
out_dim[2] = (dH != 1) ?
%(height)
s : (CudaNdarray_HOST_DIMS(top)[2] - 1) * dH +
(CudaNdarray_HOST_DIMS(weights)[2]-1)*dilH + 1
- 2*padH;
out_dim[3] = (dW != 1) ?
%(width)
s : (CudaNdarray_HOST_DIMS(top)[3] - 1) * dW +
(CudaNdarray_HOST_DIMS(weights)[3]-1)*dilW + 1
- 2*padW;
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseGpuCorrMM: direction must be 0, 1, or 2
\\
n");
...
...
@@ -1137,7 +1150,7 @@ class BaseGpuCorrMM(GpuOp):
}
// Call CUDA code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, padH, padW);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW,
dilH, dilW,
padH, padW);
if (out2==NULL){
%(fail)
s
}
...
...
@@ -1168,6 +1181,10 @@ class GpuCorrMM(BaseGpuCorrMM):
`(sv, sh)` is equivalent to `GpuCorrMM(...)(...)[:,:,::sv, ::sh]`,
but faster.
Set to `(1, 1)` to disable subsampling.
filter_dilation
The filter dilation operation applied to each input image.
Should be a tuple with 2 elements.
Set to `(1, 1)` to disable filter dilation.
pad
Deprecated alias for `border_mode`.
...
...
@@ -1198,8 +1215,10 @@ class GpuCorrMM(BaseGpuCorrMM):
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
pad
=
(
0
,
0
)):
super
(
GpuCorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
super
(
GpuCorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
pad
)
def
make_node
(
self
,
img
,
kern
):
img
=
as_cuda_ndarray_variable
(
img
)
...
...
@@ -1223,9 +1242,13 @@ class GpuCorrMM(BaseGpuCorrMM):
bottom
,
weights
=
inp
top
,
=
grads
top
=
gpu_contiguous
(
top
)
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
)(
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
)(
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
...
...
@@ -1243,8 +1266,12 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
pad
=
(
0
,
0
)):
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
super
(
GpuCorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
pad
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
img
=
as_cuda_ndarray_variable
(
img
)
...
...
@@ -1278,12 +1305,13 @@ class GpuCorrMM_gradWeights(BaseGpuCorrMM):
bottom
,
top
=
inp
[:
2
]
weights
,
=
grads
weights
=
gpu_contiguous
(
weights
)
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_bottom
=
GpuCorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
GpuCorrMM
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),
)
*
2
if
len
(
inp
)
==
4
else
()
...
...
@@ -1309,8 +1337,10 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
pad
=
(
0
,
0
)):
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
pad
)
super
(
GpuCorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
,
pad
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
kern
=
as_cuda_ndarray_variable
(
kern
)
...
...
@@ -1341,11 +1371,14 @@ class GpuCorrMM_gradInputs(BaseGpuCorrMM):
weights
,
top
=
inp
[:
2
]
bottom
,
=
grads
bottom
=
gpu_contiguous
(
bottom
)
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
GpuCorrMM
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
d_weights
=
GpuCorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
GpuCorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
d_height_width
=
(
theano
.
gradient
.
DisconnectedType
()(),
)
*
2
if
len
(
inp
)
==
4
else
()
...
...
@@ -1871,12 +1904,14 @@ class GpuCorr3dMM_gradInputs(BaseGpuCorr3dMM):
weights
,
top
=
inp
[:
2
]
bottom
,
=
grads
bottom
=
gpu_contiguous
(
bottom
)
d_weights
=
GpuCorr3dMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
bottom
,
top
,
weights
.
shape
[
-
3
:])
d_top
=
GpuCorr3dMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
bottom
,
weights
)
d_weights
=
GpuCorr3dMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
bottom
,
top
,
weights
.
shape
[
-
3
:])
d_top
=
GpuCorr3dMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
pad
)(
bottom
,
weights
)
d_height_width_depth
=
(
theano
.
gradient
.
DisconnectedType
()(),)
\
*
3
if
len
(
inp
)
==
5
else
()
return
(
d_weights
,
d_top
)
+
d_height_width_depth
...
...
theano/sandbox/cuda/corr_gemm.cu
浏览文件 @
a668c6c5
...
...
@@ -52,6 +52,39 @@ inline int GET_BLOCKS(const int N) {
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu)
// Kernels for fast unfold + copy
// CUDA kernel for the case of dilation
__global__ void dilated_im2col_kernel(const int n, const float* data_im,
const int height, const int width, const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int height_col, const int width_col,
float* data_col) {
CUDA_KERNEL_LOOP(index, n) {
const int h_index = index / width_col;
const int h_col = h_index % height_col;
const int w_col = index % width_col;
const int c_im = h_index / height_col;
const int c_col = c_im * kernel_h * kernel_w;
const int h_offset = h_col * stride_h - pad_h;
const int w_offset = w_col * stride_w - pad_w;
float* data_col_ptr = data_col;
data_col_ptr += (c_col * height_col + h_col) * width_col + w_col;
const float* data_im_ptr = data_im;
data_im_ptr += (c_im * height + h_offset) * width + w_offset;
for (int i = 0; i < kernel_h; ++i) {
for (int j = 0; j < kernel_w; ++j) {
int h_im = h_offset + i * dilation_h;
int w_im = w_offset + j * dilation_w;
*data_col_ptr =
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
data_im_ptr[i * dilation_h * width + j * dilation_w] : 0;
data_col_ptr += height_col * width_col;
}
}
}
}
__global__ void im2col_kernel(const int n, const float* data_im,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
...
...
@@ -59,23 +92,24 @@ __global__ void im2col_kernel(const int n, const float* data_im,
const int height_col, const int width_col,
float* data_col) {
CUDA_KERNEL_LOOP(index, n) {
int w_out = index %
width_col;
int h_index = index / width
_col;
int h_out = h_index % height
_col;
int channel_in
= h_index / height_col;
int channel_out = channel_in
* kernel_h * kernel_w;
int h_in = h_out
* stride_h - pad_h;
int w_in = w_out
* stride_w - pad_w;
const int h_index = index /
width_col;
const int h_col = h_index % height
_col;
const int w_col = index % width
_col;
const int c_im
= h_index / height_col;
const int c_col = c_im
* kernel_h * kernel_w;
const int h_offset = h_col
* stride_h - pad_h;
const int w_offset = w_col
* stride_w - pad_w;
float* data_col_ptr = data_col;
data_col_ptr += (c
hannel_out * height_col + h_out) * width_col + w_out
;
data_col_ptr += (c
_col * height_col + h_col) * width_col + w_col
;
const float* data_im_ptr = data_im;
data_im_ptr += (c
hannel_in * height + h_in) * width + w_in
;
data_im_ptr += (c
_im * height + h_offset) * width + w_offset
;
for (int i = 0; i < kernel_h; ++i) {
for (int j = 0; j < kernel_w; ++j) {
int h = h_in + i;
int w = w_in + j;
*data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
data_im_ptr[i * width + j] : 0;
int h_im = h_offset + i ;
int w_im = w_offset + j ;
*data_col_ptr =
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
data_im_ptr[i * width + j] : 0;
data_col_ptr += height_col * width_col;
}
}
...
...
@@ -84,52 +118,97 @@ __global__ void im2col_kernel(const int n, const float* data_im,
void im2col(const float* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
float* data_col) {
// We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid.
int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
int dil_kernel_h = (kernel_h - 1) * dilation_h + 1;
int dil_kernel_w = (kernel_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1;
int num_kernels = channels * height_col * width_col;
im2col_kernel<<<GET_BLOCKS(num_kernels),
if(dilation_h != 1 || dilation_w != 1){
dilated_im2col_kernel<<<GET_BLOCKS(num_kernels),
CUDA_NUM_THREADS>>>(
num_kernels, data_im, height, width, kernel_h, kernel_w,
pad_h,
pad_w, stride_h, stride_w, height_col,
num_kernels, data_im, height, width, kernel_h, kernel_w,
dilation_h, dilation_w, pad_h,
pad_w, stride_h, stride_w, height_col,
width_col, data_col);
}
else{
im2col_kernel<<<GET_BLOCKS(num_kernels),
CUDA_NUM_THREADS>>>(
num_kernels, data_im, height, width, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, height_col,
width_col, data_col);
}
}
__global__ void col2im_kernel(const int n, const float* data_col,
// CUDA kernel for the case of dilation
__global__ void dilated_col2im_kernel(const int n, const float* data_col,
const int height, const int width, const int channels,
const int patch_h, const int patch_w,
const int kernel_h, const int kernel_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int height_col, const int width_col,
float* data_im) {
CUDA_KERNEL_LOOP(index, n) {
float val = 0;
int w = index % width + pad_w;
int h = (index / width) % height + pad_h;
int c = index / (width * height);
const int w_im = index % width + pad_w;
const int h_im = (index / width) % height + pad_h;
const int c_im = index / (width * height);
int kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
int kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
// compute the start and end of the output
int w_col_start = (w < patch_w) ? 0 : (w - patch_w) / stride_w + 1;
int w_col_end = min(w / stride_w + 1, width_col);
int h_col_start = (h < patch_h) ? 0 : (h - patch_h) / stride_h + 1;
int h_col_end = min(h / stride_h + 1, height_col);
/*
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
// the col location: [c * width * height + h_out, w_out]
int c_col = c * patch_h * patch_w + (h - h_col * stride_h) * ksize
+ (w - w_col * stride_w);
val += data_col[(c_col * height_col + h_col) * width_col + w_col];
const int w_col_start =
(w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1;
const int w_col_end = min(w_im / stride_w + 1, width_col);
const int h_col_start =
(h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1;
const int h_col_end = min(h_im / stride_h + 1, height_col);
// TODO: use LCM of stride and dilation to avoid unnecessary loops
for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) {
for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) {
int h_k = (h_im - h_col * stride_h);
int w_k = (w_im - w_col * stride_w);
if (h_k % dilation_h == 0 && w_k % dilation_w == 0) {
h_k /= dilation_h;
w_k /= dilation_w;
int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) *
height_col + h_col) * width_col + w_col;
val += data_col[data_col_index];
}
}
}
*/
// equivalent implementation
data_im[index] = val;
}
}
__global__ void col2im_kernel(const int n, const float* data_col,
const int height, const int width, const int channels,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int height_col, const int width_col,
float* data_im) {
CUDA_KERNEL_LOOP(index, n) {
float val = 0;
const int w_im = index % width + pad_w;
const int h_im = (index / width) % height + pad_h;
const int c_im = index / (width * height);
// compute the start and end of the output
const int w_col_start =
(w_im < kernel_w) ? 0 : (w_im - kernel_w) / stride_w + 1;
const int w_col_end = min(w_im / stride_w + 1, width_col);
const int h_col_start =
(h_im < kernel_h) ? 0 : (h_im - kernel_h) / stride_h + 1;
const int h_col_end = min(h_im / stride_h + 1, height_col);
// equivalent implementation, no dilation
int offset =
(c * patch_h * patch_w + h * patch_w + w
) * height_col * width_col;
int coeff_h_col = (1 - stride_h *
patch
_w * height_col) * width_col;
(c_im * kernel_h * kernel_w + h_im * kernel_w + w_im
) * height_col * width_col;
int coeff_h_col = (1 - stride_h *
kernel
_w * height_col) * width_col;
int coeff_w_col = (1 - stride_w * height_col * width_col);
for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
...
...
@@ -142,18 +221,30 @@ __global__ void col2im_kernel(const int n, const float* data_col,
void col2im(const float* data_col, const int channels,
const int height, const int width, const int patch_h, const int patch_w,
const int dilation_h, const int dilation_w,
const int pad_h, const int pad_w, const int stride_h,
const int stride_w, float* data_im) {
int height_col = (height + 2 * pad_h - patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - patch_w) / stride_w + 1;
int dil_patch_h = (patch_h - 1) * dilation_h + 1;
int dil_patch_w = (patch_w - 1) * dilation_w + 1;
int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1;
int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1;
int num_kernels = channels * height * width;
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
col2im_kernel<<<GET_BLOCKS(num_kernels),
if(dilation_h != 1 || dilation_w != 1){
dilated_col2im_kernel<<<GET_BLOCKS(num_kernels),
CUDA_NUM_THREADS>>>(
num_kernels, data_col, height, width, channels, patch_h, patch_w,
dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w,
height_col, width_col, data_im);
}
else{
col2im_kernel<<<GET_BLOCKS(num_kernels),
CUDA_NUM_THREADS>>>(
num_kernels, data_col, height, width, channels, patch_h, patch_w,
pad_h, pad_w, stride_h, stride_w,
height_col, width_col, data_im);
}
}
...
...
@@ -167,6 +258,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
const int direction,
const int dH = 1,
const int dW = 1,
const int dilH = 1,
const int dilW = 1,
const int padH = 0,
const int padW = 0)
{
...
...
@@ -236,9 +329,12 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
"GpuCorrMM images and kernel must have the same stack size\n");
return NULL;
}
// implicit dilated filter
const int dil_kH = (kH - 1) * dilH + 1;
const int dil_kW = (kW - 1) * dilW + 1;
// top: (batchSize, nFilters, topHeight, topWidth)
const int topHeight = (bottomHeight + 2*padH - kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW - kW) / dW + 1;
const int topHeight = (bottomHeight + 2*padH -
dil_
kH) / dH + 1;
const int topWidth = (bottomWidth + 2*padW -
dil_
kW) / dW + 1;
if (batchSize != CudaNdarray_HOST_DIMS(top)[0] ||
nFilters != CudaNdarray_HOST_DIMS(top)[1] ||
topHeight != CudaNdarray_HOST_DIMS(top)[2] ||
...
...
@@ -286,7 +382,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
for (int n = 0; n < batchSize; n++) {
// First, im2col
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata);
bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, col->devdata);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
...
...
@@ -353,7 +450,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
for (int n = 0; n < batchSize; n++) {
// First, im2col
im2col(bottom->devdata + n * bottom_stride, nChannels, bottomHeight,
bottomWidth, kH, kW, padH, padW, dH, dW, col->devdata);
bottomWidth, kH, kW, dilH, dilW,
padH, padW, dH, dW, col->devdata);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
...
...
@@ -438,7 +536,8 @@ CudaNdarray* corrMM(CudaNdarray *const bottom,
}
// col2im back to the data
col2im(col->devdata, nChannels, bottomHeight, bottomWidth,
kH, kW, padH, padW, dH, dW, bottom->devdata + n * bottom_stride);
kH, kW, dilH, dilW, padH, padW,
dH, dW, bottom->devdata + n * bottom_stride);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
PyErr_Format(PyExc_RuntimeError,
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
a668c6c5
...
...
@@ -3116,6 +3116,8 @@ def local_abstractconv_cudnn(node):
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
))):
return
None
if
(
node
.
op
.
filter_dilation
!=
(
1
,
1
)):
return
None
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
...
...
@@ -3123,6 +3125,7 @@ def local_abstractconv_cudnn(node):
if
(
not
isinstance
(
inp1
.
type
,
CudaNdarrayType
)
or
not
isinstance
(
inp2
.
type
,
CudaNdarrayType
)):
return
None
if
not
dnn_available
():
return
None
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
a668c6c5
...
...
@@ -1622,7 +1622,8 @@ def local_conv_gemm(node):
# because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization)
rval
=
theano
.
sandbox
.
cuda
.
as_cuda_ndarray_variable
(
GpuCorrMM_gradWeights
(
border_mode
,
subsample
)(
GpuCorrMM_gradWeights
(
border_mode
,
subsample
)(
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
)),
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
))
)
.
dimshuffle
(
1
,
0
,
2
,
3
))
...
...
@@ -2769,28 +2770,33 @@ def local_abstractconv_gemm(node):
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
if
(
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
)):
filter_dilation
=
node
.
op
.
filter_dilation
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))):
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
# need to dimshuffle the kernel for full convolution
kern
=
kern
.
dimshuffle
(
1
,
0
,
2
,
3
)
# call GpuCorrMM_gradInputs
rval
=
GpuCorrMM_gradInputs
(
'valid'
,
subsample
)(
rval
=
GpuCorrMM_gradInputs
(
'valid'
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
img
))
else
:
# need to flip the kernel if necessary
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
# By default use GpuCorrMM
rval
=
GpuCorrMM
(
border_mode
,
subsample
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth
# is larger than inputChannels * outputHeight * outputWidth.
# GpuConv does not always store information on the batchsize and
# channels, though, so we only use what information we have.)
if
((
subsample
==
(
1
,
1
))
and
if
((
subsample
==
(
1
,
1
))
and
(
filter_dilation
==
(
1
,
1
))
and
(
node
.
op
.
imshp
is
not
None
)
and
(
None
not
in
node
.
op
.
imshp
[
-
2
:])
and
(
node
.
op
.
kshp
is
not
None
)
and
...
...
@@ -2810,7 +2816,9 @@ def local_abstractconv_gemm(node):
# because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization)
rval
=
theano
.
sandbox
.
cuda
.
as_cuda_ndarray_variable
(
GpuCorrMM_gradWeights
(
border_mode
,
subsample
)(
GpuCorrMM_gradWeights
(
border_mode
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
)),
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
))
)
.
dimshuffle
(
1
,
0
,
2
,
3
))
...
...
@@ -2827,7 +2835,8 @@ def local_abstractconv_gradweight_gemm(node):
return
None
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
...
...
@@ -2849,7 +2858,8 @@ def local_abstractconv_gradinputs_gemm(node):
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
return
[
rval
]
...
...
@@ -2870,10 +2880,12 @@ conv_groupopt.register('local_abstractconv_dnn',
conv_groupopt
.
register
(
'local_abstractconv_gemm'
,
local_abstractconv_gemm
,
30
,
'conv_gemm'
,
'gpu'
,
'fast_compile'
,
'fast_run'
)
conv_groupopt
.
register
(
'local_abstractconv_gradweight_gemm'
,
local_abstractconv_gradweight_gemm
,
30
,
'conv_gemm'
,
'gpu'
,
'fast_compile'
,
'fast_run'
)
conv_groupopt
.
register
(
'local_abstractconv_gradinputs_gemm'
,
local_abstractconv_gradinputs_gemm
,
30
,
'conv_gemm'
,
...
...
theano/sandbox/cuda/tests/test_abstractconv.py
浏览文件 @
a668c6c5
...
...
@@ -29,25 +29,30 @@ class TestDnnConv2d(test_abstract_conv.BaseTestConv2d):
self
.
provide_shape
=
[
False
]
self
.
shared
=
gpu_shared
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
):
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
)):
if
fd
!=
(
1
,
1
):
raise
SkipTest
(
"No dilation implementation for cuDNN ConvOp."
)
if
not
dnn_available
():
raise
SkipTest
(
cuda
.
dnn
.
dnn_available
.
msg
)
mode
=
mode_with_gpu
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConv
)
filter_flip
=
flip
,
target_op
=
GpuDnnConv
,
filter_dilation
=
fd
)
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradW
)
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradW
,
filter_dilation
=
fd
)
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
)
filter_flip
=
flip
,
target_op
=
GpuDnnConvGradI
,
filter_dilation
=
fd
)
class
TestCorrMMConv2d
(
test_abstract_conv
.
BaseTestConv2d
):
...
...
@@ -56,28 +61,30 @@ class TestCorrMMConv2d(test_abstract_conv.BaseTestConv2d):
self
.
shared
=
gpu_shared
self
.
mode
=
mode_with_gpu
.
excluding
(
'cudnn'
)
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
):
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
)
):
mode
=
self
.
mode
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
(
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
)
)
filter_flip
=
flip
,
target_op
=
(
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
)
,
filter_dilation
=
fd
)
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorrMM_gradWeights
)
target_op
=
GpuCorrMM_gradWeights
,
filter_dilation
=
fd
)
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
GpuCorrMM_gradInputs
)
target_op
=
GpuCorrMM_gradInputs
,
filter_dilation
=
fd
)
class
TestDnnConvTypes
(
test_abstract_conv
.
TestConvTypes
):
...
...
theano/tensor/nnet/__init__.py
浏览文件 @
a668c6c5
...
...
@@ -35,7 +35,7 @@ from .abstract_conv import conv2d as abstract_conv2d
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
image_shape
=
None
,
**
kwargs
):
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
**
kwargs
):
"""
This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
...
...
@@ -95,6 +95,10 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
image_shape: None, tuple/list of len 4 of int or Constant variable
Deprecated alias for input_shape.
filter_dilation: tuple of len 2
Factor by which to subsample (stride) the input.
Also called dilation elsewhere.
kwargs: Any other keyword arguments are accepted for backwards
compatibility, but will be ignored.
...
...
@@ -140,4 +144,5 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
" be provided at the same time."
)
return
abstract_conv2d
(
input
,
filters
,
input_shape
,
filter_shape
,
border_mode
,
subsample
,
filter_flip
)
border_mode
,
subsample
,
filter_flip
,
filter_dilation
)
theano/tensor/nnet/abstract_conv.py
浏览文件 @
a668c6c5
...
...
@@ -32,7 +32,8 @@ _logger = logging.getLogger("theano.tensor.nnet.abstract_conv")
def
get_conv_output_shape
(
image_shape
,
kernel_shape
,
border_mode
,
subsample
):
border_mode
,
subsample
,
filter_dilation
=
(
1
,
1
)):
"""
This function compute the output shape of convolution operation.
...
...
@@ -53,6 +54,8 @@ def get_conv_output_shape(image_shape, kernel_shape,
subsample: tuple of int (symbolic or numeric). Its or three elements
espectively correspond to the subsampling on height and width (and
possibly depth) axis.
filter_dilation: tuple of int (symbolic or numeric). Its two elements
correspond respectively to the dilation on height and width axis.
Returns
-------
...
...
@@ -65,17 +68,19 @@ def get_conv_output_shape(image_shape, kernel_shape,
nkern
,
kshp
=
kernel_shape
[
0
],
kernel_shape
[
2
:]
if
isinstance
(
border_mode
,
tuple
):
out_shp
=
tuple
(
get_conv_shape_1axis
(
imshp
[
i
],
kshp
[
i
],
border_mode
[
i
],
subsample
[
i
])
for
i
in
range
(
len
(
subsample
)))
imshp
[
i
],
kshp
[
i
],
border_mode
[
i
],
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
else
:
out_shp
=
tuple
(
get_conv_shape_1axis
(
imshp
[
i
],
kshp
[
i
],
border_mode
,
subsample
[
i
])
for
i
in
range
(
len
(
subsample
)))
imshp
[
i
],
kshp
[
i
],
border_mode
,
subsample
[
i
],
filter_dilation
[
i
])
for
i
in
range
(
len
(
subsample
)))
return
(
bsize
,
nkern
)
+
out_shp
def
get_conv_shape_1axis
(
image_shape
,
kernel_shape
,
border_mode
,
subsample
):
# filter dilation set by default to 1
# for compatibility with other tests.
def
get_conv_shape_1axis
(
image_shape
,
kernel_shape
,
border_mode
,
subsample
,
dilation
=
1
):
"""
This function compute the output shape of convolution operation.
...
...
@@ -90,6 +95,8 @@ def get_conv_shape_1axis(image_shape, kernel_shape,
the padding on the considered axis.
subsample: int. It must correspond to the subsampling on the
considered axis.
dilation: int. It must correspond to the dilation on the
considered axis.
Returns
-------
...
...
@@ -97,19 +104,22 @@ def get_conv_shape_1axis(image_shape, kernel_shape,
considered axis. None if undefined.
"""
if
None
in
[
image_shape
,
kernel_shape
,
border_mode
,
subsample
]:
if
None
in
[
image_shape
,
kernel_shape
,
border_mode
,
subsample
,
dilation
]:
return
None
# Implicit dilated kernel shape
dil_kernel_shape
=
(
kernel_shape
-
1
)
*
dilation
+
1
if
border_mode
==
"half"
:
pad
=
kernel_shape
//
2
pad
=
dil_
kernel_shape
//
2
elif
border_mode
==
"full"
:
pad
=
kernel_shape
-
1
pad
=
dil_
kernel_shape
-
1
elif
border_mode
==
"valid"
:
pad
=
0
else
:
pad
=
border_mode
if
pad
<
0
:
raise
ValueError
(
"border_mode must be >= 0"
)
out_shp
=
(
image_shape
+
2
*
pad
-
kernel_shape
)
//
subsample
+
1
out_shp
=
(
image_shape
+
2
*
pad
-
dil_
kernel_shape
)
//
subsample
+
1
return
out_shp
...
...
@@ -120,7 +130,8 @@ def conv2d(input,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
"""This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
after Convolutional Neural Networks (CNN).
...
...
@@ -134,7 +145,8 @@ def conv2d(input,
kshp
=
filter_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
)
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
)
return
conv_op
(
input
,
filters
)
...
...
@@ -144,7 +156,8 @@ def conv2d_grad_wrt_inputs(output_grad,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
"""Compute conv output gradient w.r.t its inputs
This function builds the symbolic graph for getting the
...
...
@@ -214,6 +227,9 @@ def conv2d_grad_wrt_inputs(output_grad,
referred to as a convolution, and this is the default. If
``False``, the filters are not flipped and the operation is
referred to as a cross-correlation.
filter_dilation : tuple of len 2
The filter dilation used in the forward pass.
Also known as input striding.
Returns
-------
...
...
@@ -263,7 +279,8 @@ def conv2d_grad_wrt_inputs(output_grad,
kshp
=
filter_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
)
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
)
return
grad_input_op
(
filters
,
output_grad
,
input_shape
[
-
2
:])
...
...
@@ -274,7 +291,8 @@ def conv2d_grad_wrt_weights(input,
input_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
"""Compute conv output gradient w.r.t its weights
This function will build the symbolic graph for getting the
...
...
@@ -327,7 +345,6 @@ def conv2d_grad_wrt_weights(input,
``(int1, int2)``
pad input with a symmetric border of ``int1`` rows and
``int2`` columns, then perform a valid convolution.
subsample : tuple of len 2
The subsampling used in the forward pass of the convolutional
operation. Also called strides elsewhere.
...
...
@@ -337,6 +354,9 @@ def conv2d_grad_wrt_weights(input,
referred to as a convolution, and this is the default. If
``False``, the filters are not flipped and the operation is
referred to as a cross-correlation.
filter_dilation : tuple of len 2
The filter dilation used in the forward pass.
Also known as input striding.
Returns
-------
...
...
@@ -386,7 +406,8 @@ def conv2d_grad_wrt_weights(input,
kshp
=
numerical_filter_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
)
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
)
return
gradWeight_op
(
input
,
output_grad
,
filter_shape
[:
-
2
])
...
...
@@ -541,7 +562,8 @@ def bilinear_upsampling(input,
filter_shape
=
(
1
,
1
,
None
,
1
),
border_mode
=
(
pad
,
0
),
subsample
=
(
ratio
,
1
),
filter_flip
=
True
)
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
))
# upsampling cols
upsampled_mat
=
conv2d_grad_wrt_inputs
(
output_grad
=
upsampled_row
,
filters
=
kern
[
np
.
newaxis
,
...
...
@@ -553,7 +575,8 @@ def bilinear_upsampling(input,
filter_shape
=
(
1
,
1
,
1
,
None
),
border_mode
=
(
0
,
pad
),
subsample
=
(
1
,
ratio
),
filter_flip
=
True
)
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
))
else
:
kern
=
bilinear_kernel_2D
(
ratio
=
ratio
,
normalize
=
True
)
upsampled_mat
=
conv2d_grad_wrt_inputs
(
output_grad
=
concat_mat
,
...
...
@@ -565,7 +588,8 @@ def bilinear_upsampling(input,
filter_shape
=
(
1
,
1
,
None
,
None
),
border_mode
=
(
pad
,
pad
),
subsample
=
(
ratio
,
ratio
),
filter_flip
=
True
)
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
))
return
upsampled_mat
.
reshape
((
input
.
shape
[
0
],
input
.
shape
[
1
],
row
*
ratio
,
col
*
ratio
))
...
...
@@ -620,14 +644,18 @@ class BaseAbstractConv2d(Op):
are not flipped and the operation is referred to as a
cross-correlation.
filter_dilation: tuple of len 2
Factor by which to subsample (stride) the input.
Also called dilation factor.
"""
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_flip'
,
'imshp'
,
'kshp'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_flip'
,
'imshp'
,
'kshp'
,
'filter_dilation'
)
def
__init__
(
self
,
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)
,
filter_
flip
=
True
):
imshp
=
None
,
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
filter_
dilation
=
(
1
,
1
)
):
if
isinstance
(
border_mode
,
integer_types
):
border_mode
=
(
border_mode
,
border_mode
)
...
...
@@ -673,6 +701,9 @@ class BaseAbstractConv2d(Op):
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
self
.
subsample
=
tuple
(
subsample
)
if
len
(
filter_dilation
)
!=
2
:
raise
ValueError
(
"filter_dilation must have two elements"
)
self
.
filter_dilation
=
tuple
(
filter_dilation
)
def
flops
(
self
,
inp
,
outp
):
""" Useful with the hack in profilemode to print the MFlops"""
...
...
@@ -694,7 +725,7 @@ class BaseAbstractConv2d(Op):
# This may change in the future.
return
False
def
conv2d
(
self
,
img
,
kern
,
mode
=
"valid"
):
def
conv2d
(
self
,
img
,
kern
,
mode
=
"valid"
,
dilation
=
(
1
,
1
)
):
"""
Basic slow python implementatation for DebugMode
"""
...
...
@@ -708,8 +739,16 @@ class BaseAbstractConv2d(Op):
'invalid mode {}, which must be either '
'"valid" or "full"'
.
format
(
mode
))
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
[
1
,
1
])
out_shape
=
get_conv_output_shape
(
img
.
shape
,
kern
.
shape
,
mode
,
[
1
,
1
],
dilation
)
out
=
numpy
.
zeros
(
out_shape
,
dtype
=
img
.
dtype
)
dil_kern_shp
=
kern
.
shape
[:
-
2
]
+
((
kern
.
shape
[
-
2
]
-
1
)
*
dilation
[
0
]
+
1
,
(
kern
.
shape
[
-
1
]
-
1
)
*
dilation
[
1
]
+
1
)
dilated_kern
=
numpy
.
zeros
(
dil_kern_shp
,
dtype
=
kern
.
dtype
)
dilated_kern
[:,
:,
::
dilation
[
0
],
::
dilation
[
1
]]
=
kern
val
=
_valfrommode
(
mode
)
bval
=
_bvalfromboundary
(
'fill'
)
...
...
@@ -720,7 +759,7 @@ class BaseAbstractConv2d(Op):
for
im0
in
xrange
(
img
.
shape
[
1
]):
# some cast generates a warning here
out
[
b
,
n
,
...
]
+=
_convolve2d
(
img
[
b
,
im0
,
...
],
kern
[
n
,
im0
,
...
],
dilated_
kern
[
n
,
im0
,
...
],
1
,
val
,
bval
,
0
)
return
out
...
...
@@ -736,10 +775,11 @@ class AbstractConv2d(BaseAbstractConv2d):
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
super
(
AbstractConv2d
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
subsample
,
filter_flip
)
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
super
(
AbstractConv2d
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
subsample
,
filter_flip
,
filter_dilation
)
def
make_node
(
self
,
img
,
kern
):
# Make sure both inputs are Variables with the same Type
...
...
@@ -766,6 +806,8 @@ class AbstractConv2d(BaseAbstractConv2d):
img
,
kern
=
inp
img
=
numpy
.
asarray
(
img
)
kern
=
numpy
.
asarray
(
kern
)
dil_kernshp
=
((
kern
.
shape
[
2
]
-
1
)
*
self
.
filter_dilation
[
0
]
+
1
,
(
kern
.
shape
[
3
]
-
1
)
*
self
.
filter_dilation
[
1
]
+
1
)
o
,
=
out_
mode
=
self
.
border_mode
...
...
@@ -777,9 +819,9 @@ class AbstractConv2d(BaseAbstractConv2d):
' integers'
.
format
(
mode
))
if
mode
==
"full"
:
mode
=
(
kern
.
shape
[
2
]
-
1
,
kern
.
shape
[
3
]
-
1
)
mode
=
(
dil_kernshp
[
0
]
-
1
,
dil_kernshp
[
1
]
-
1
)
elif
mode
==
"half"
:
mode
=
(
kern
.
shape
[
2
]
//
2
,
kern
.
shape
[
3
]
//
2
)
mode
=
(
dil_kernshp
[
0
]
//
2
,
dil_kernshp
[
1
]
//
2
)
if
isinstance
(
mode
,
tuple
):
pad_h
,
pad_w
=
map
(
int
,
mode
)
mode
=
"valid"
...
...
@@ -790,7 +832,7 @@ class AbstractConv2d(BaseAbstractConv2d):
img
=
new_img
if
not
self
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
conv_out
=
self
.
conv2d
(
img
,
kern
,
mode
=
"valid"
)
conv_out
=
self
.
conv2d
(
img
,
kern
,
mode
=
"valid"
,
dilation
=
self
.
filter_dilation
)
conv_out
=
conv_out
[:,
:,
::
self
.
subsample
[
0
],
::
self
.
subsample
[
1
]]
o
[
0
]
=
node
.
outputs
[
0
]
.
type
.
filter
(
conv_out
)
...
...
@@ -812,12 +854,14 @@ class AbstractConv2d(BaseAbstractConv2d):
d_bottom
=
AbstractConv2d_gradInputs
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
)(
self
.
filter_flip
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
)(
self
.
filter_flip
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
...
...
@@ -844,7 +888,7 @@ class AbstractConv2d(BaseAbstractConv2d):
kshp
=
[
kshp
[
i
]
if
self
.
kshp
[
i
]
is
None
else
self
.
kshp
[
i
]
for
i
in
range
(
4
)]
res
=
get_conv_output_shape
(
imshp
,
kshp
,
self
.
border_mode
,
self
.
subsample
)
self
.
subsample
,
self
.
filter_dilation
)
return
[
res
]
...
...
@@ -863,11 +907,13 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
super
(
AbstractConv2d_gradWeights
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
subsample
,
filter_flip
)
filter_flip
,
filter_dilation
)
# Update shape/height_width
def
make_node
(
self
,
img
,
topgrad
,
shape
):
...
...
@@ -943,15 +989,16 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
d_bottom
=
AbstractConv2d_gradInputs
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
self
.
filter_flip
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
)(
bottom
,
weights
)
self
.
filter_flip
,
self
.
filter_dilation
)(
bottom
,
weights
)
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
...
...
@@ -998,11 +1045,13 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
kshp
=
None
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_flip
=
True
):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
super
(
AbstractConv2d_gradInputs
,
self
)
.
__init__
(
imshp
,
kshp
,
border_mode
,
subsample
,
filter_flip
)
filter_flip
,
filter_dilation
)
# Update shape/height_width
def
make_node
(
self
,
kern
,
topgrad
,
shape
):
...
...
@@ -1070,12 +1119,15 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
bottom
,
=
grads
d_weights
=
AbstractConv2d_gradWeights
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
AbstractConv2d
(
self
.
imshp
,
self
.
kshp
,
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
self
.
border_mode
,
self
.
subsample
,
self
.
filter_flip
,
self
.
filter_dilation
)(
bottom
,
weights
)
# Make sure that the broadcastable pattern of the inputs is used
# for the gradients, even if the grad opts are not able to infer
# that the dimensions are broadcastable.
...
...
theano/tensor/nnet/corr.py
浏览文件 @
a668c6c5
...
...
@@ -27,12 +27,14 @@ class BaseCorrMM(gof.Op):
or a pair of integers
subsample
Perform subsampling of the output (default: (1, 1)).
filter_dilation
Perform dilated correlation (default: (1,1))
"""
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
)):
if
isinstance
(
border_mode
,
integer_types
):
if
border_mode
<
0
:
raise
ValueError
(
...
...
@@ -55,7 +57,10 @@ class BaseCorrMM(gof.Op):
self
.
border_mode
=
border_mode
if
len
(
subsample
)
!=
2
:
raise
ValueError
(
"subsample must have two elements"
)
if
len
(
filter_dilation
)
!=
2
:
raise
ValueError
(
"filter_dilation must have two elements"
)
self
.
subsample
=
tuple
(
subsample
)
self
.
filter_dilation
=
tuple
(
filter_dilation
)
@property
def
pad
(
self
):
...
...
@@ -64,10 +69,11 @@ class BaseCorrMM(gof.Op):
return
(
0
,
0
)
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s
,
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
border_mode
,
str
(
self
.
subsample
))
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
))
def
c_support_code
(
self
):
return
blas_header_text
()
...
...
@@ -89,7 +95,7 @@ class BaseCorrMM(gof.Op):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
1
,
1
)
return
(
1
,
2
)
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
@@ -155,6 +161,7 @@ class BaseCorrMM(gof.Op):
if
not
theano
.
config
.
blas
.
ldflags
:
raise
NotImplementedError
(
"C code for CorrMM* classes need a blas library."
)
dH
,
dW
=
self
.
subsample
dilH
,
dilW
=
self
.
filter_dilation
if
self
.
border_mode
==
"half"
:
padH
=
padW
=
-
1
elif
self
.
border_mode
==
"full"
:
...
...
@@ -201,6 +208,8 @@ class BaseCorrMM(gof.Op):
// Optional args
int dH =
%(dH)
s;
int dW =
%(dW)
s;
int dilH =
%(dilH)
s;
int dilW =
%(dilW)
s;
int padH =
%(padH)
s;
int padW =
%(padW)
s;
...
...
@@ -224,39 +233,43 @@ class BaseCorrMM(gof.Op):
}
else if (padH == -2) {
// vertical full padding, we can infer the kernel height
kH =
2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH
;
kH =
(2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1
;
}
else {
// explicit padding, we can infer the kernel height
kH =
PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH
;
kH =
(PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1
;
}
if ((dW != 1) || (padW == -1)) {
kW =
%(width)
s;
}
else if (padW == -2) {
kW =
2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW
;
kW =
(2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1
;
}
else {
kW =
PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW
;
kW =
(PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1
;
}
}
// Implicit dilated kernel size
int dil_kH = (kH - 1) * dilH + 1;
int dil_kW = (kW - 1) * dilW + 1;
// Auto-padding if requested
if (padH == -1) { // vertical half padding
padH = kH / 2;
padH =
dil_
kH / 2;
}
else if (padH == -2) { // vertical full padding
padH = kH - 1;
padH =
dil_
kH - 1;
}
else if (padH < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH must be >= -2");
%(fail)
s
}
if (padW == -1) { // horizontal half padding
padW = kW / 2;
padW =
dil_
kW / 2;
}
else if (padW == -2) { // horizontal full padding
padW = kW - 1;
padW =
dil_
kW - 1;
}
else if (padW < 0) {
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW must be >= -2");
...
...
@@ -268,15 +281,15 @@ class BaseCorrMM(gof.Op):
switch(direction) {
case 0: // forward pass
// output is top: (batchsize, num_filters, height, width)
// height and width: top = (bottom + 2*pad -
weight
) / sample + 1
// height and width: top = (bottom + 2*pad -
((weight-1)*dil + 1)
) / sample + 1
out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH -
PyArray_DIMS(weights)[2]
) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW -
PyArray_DIMS(weights)[3]
) / dW + 1);
out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH -
((PyArray_DIMS(weights)[2]-1)*dilH + 1)
) / dH + 1);
out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW -
((PyArray_DIMS(weights)[3]-1)*dilW + 1)
) / dW + 1);
break;
case 1: // backprop wrt. weights
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights =
bottom + 2*pad - (top - 1) * sample
// height and width: weights =
(bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[2] = (npy_intp)kH; // already inferred further above
...
...
@@ -284,11 +297,11 @@ class BaseCorrMM(gof.Op):
break;
case 2: // backprop wrt. inputs
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample +
weights
- 2*pad
// height and width: bottom = (top - 1) * sample +
(weights-1)*dil + 1
- 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[2] = (npy_intp)((dH != 1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH +
PyArray_DIMS(weights)[2]
- 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW +
PyArray_DIMS(weights)[3]
- 2*padW);
out_dim[2] = (npy_intp)((dH != 1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH +
(PyArray_DIMS(weights)[2]-1)*dilH + 1
- 2*padH);
out_dim[3] = (npy_intp)((dW != 1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW +
(PyArray_DIMS(weights)[3]-1)*dilW + 1
- 2*padW);
break;
default:
PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2
\\
n");
...
...
@@ -326,7 +339,7 @@ class BaseCorrMM(gof.Op):
}
// Call corrMM code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, padH, padW);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW,
dilH, dilW,
padH, padW);
if (out2==NULL){
%(fail)
s
}
...
...
@@ -357,10 +370,15 @@ class CorrMM(BaseCorrMM):
`(sv, sh)` is equivalent to `CorrMM(...)(...)[:,:,::sv, ::sh]`,
but faster.
Set to `(1, 1)` to disable subsampling.
filter_dilation
The filter dilation operation applied to each input image.
Should be a tuple with 2 elements.
Set to `(1, 1)` to disable filter dilation.
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
super
(
CorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
)):
super
(
CorrMM
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
)
def
make_node
(
self
,
img
,
kern
):
img
=
as_tensor_variable
(
img
)
...
...
@@ -382,7 +400,8 @@ class CorrMM(BaseCorrMM):
imshp
,
kshp
,
self
.
border_mode
,
self
.
subsample
)
self
.
subsample
,
self
.
filter_dilation
)
return
[
res
]
def
c_code
(
self
,
node
,
nodename
,
inp
,
out_
,
sub
):
...
...
@@ -395,11 +414,13 @@ class CorrMM(BaseCorrMM):
bottom
,
weights
=
inp
top
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
self
.
subsample
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
...
...
@@ -415,8 +436,11 @@ class CorrMM_gradWeights(BaseCorrMM):
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
super
(
CorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
)):
super
(
CorrMM_gradWeights
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
)
def
make_node
(
self
,
img
,
topgrad
,
shape
=
None
):
img
=
as_tensor_variable
(
img
)
...
...
@@ -485,10 +509,12 @@ class CorrMM_gradWeights(BaseCorrMM):
bottom
,
top
=
inp
[:
2
]
weights
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
self
.
subsample
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
return
(
d_bottom
,
d_top
)
+
d_height_width
...
...
@@ -512,8 +538,10 @@ class CorrMM_gradInputs(BaseCorrMM):
"""
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
)):
super
(
CorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
)):
super
(
CorrMM_gradInputs
,
self
)
.
__init__
(
border_mode
,
subsample
,
filter_dilation
)
def
make_node
(
self
,
kern
,
topgrad
,
shape
=
None
):
kern
=
as_tensor_variable
(
kern
)
...
...
@@ -586,11 +614,13 @@ class CorrMM_gradInputs(BaseCorrMM):
weights
,
top
=
inp
[:
2
]
bottom
,
=
grads
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
)(
bottom
,
weights
)
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
return
(
d_weights
,
d_top
)
+
d_height_width
...
...
theano/tensor/nnet/corr_gemm.c
浏览文件 @
a668c6c5
...
...
@@ -6,13 +6,13 @@ Copyright (c) 2014, The Regents of the University of California (Regents)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
...
...
@@ -31,20 +31,24 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Loops for fast unfold + copy
void
im2col
(
const
%
(
float_type
)
s
*
data_im
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
%
(
float_type
)
s
*
data_col
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
kernel_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
kernel_w
)
/
stride_w
+
1
;
// Implicit dilated kernel size
int
dil_kernel_h
=
(
kernel_h
-
1
)
*
dilation_h
+
1
;
int
dil_kernel_w
=
(
kernel_w
-
1
)
*
dilation_w
+
1
;
int
height_col
=
(
height
+
2
*
pad_h
-
dil_kernel_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
dil_kernel_w
)
/
stride_w
+
1
;
int
channels_col
=
channels
*
kernel_h
*
kernel_w
;
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%%
kernel_w
;
int
h_offset
=
(
c
/
kernel_w
)
%%
kernel_h
;
int
c_im
=
c
/
kernel_h
/
kernel_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
]
=
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
];
...
...
@@ -60,10 +64,14 @@ void im2col(const %(float_type)s* data_im, const int channels,
// accumulated into data_im.
void
col2im
(
const
%
(
float_type
)
s
*
data_col
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
patch_h
,
const
int
patch_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
%
(
float_type
)
s
*
data_im
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
patch_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
patch_w
)
/
stride_w
+
1
;
// Implicit dilated patch
int
dil_patch_h
=
(
patch_h
-
1
)
*
dilation_h
+
1
;
int
dil_patch_w
=
(
patch_w
-
1
)
*
dilation_w
+
1
;
int
height_col
=
(
height
+
2
*
pad_h
-
dil_patch_h
)
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
dil_patch_w
)
/
stride_w
+
1
;
int
num_kernels
=
channels
*
height
*
width
;
int
channels_col
=
channels
*
patch_h
*
patch_w
;
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
...
...
@@ -71,9 +79,9 @@ void col2im(const %(float_type)s* data_col, const int channels,
int
h_offset
=
(
c
/
patch_w
)
%%
patch_h
;
int
c_im
=
c
/
patch_h
/
patch_w
;
for
(
int
h
=
0
;
h
<
height_col
;
++
h
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
*
dilation_h
;
for
(
int
w
=
0
;
w
<
width_col
;
++
w
)
{
int
h_pad
=
h
*
stride_h
-
pad_h
+
h_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
;
int
w_pad
=
w
*
stride_w
-
pad_w
+
w_offset
*
dilation_w
;
if
(
h_pad
>=
0
&&
h_pad
<
height
&&
w_pad
>=
0
&&
w_pad
<
width
)
data_im
[(
npy_intp
)(
c_im
*
height
+
h_pad
)
*
width
+
w_pad
]
+=
data_col
[(
npy_intp
)(
c
*
height_col
+
h
)
*
width_col
+
w
];
...
...
@@ -91,13 +99,15 @@ void col2im(const %(float_type)s* data_col, const int channels,
// CPU version author: Jesse Livezey
// CPU version adapted from GPU version
PyArrayObject
*
corrMM
(
PyArrayObject
*
bottom
,
PyArrayObject
*
weight
,
PyArrayObject
*
top
,
const
int
direction
,
const
int
dH
=
1
,
const
int
dW
=
1
,
const
int
padH
=
0
,
const
int
padW
=
0
)
PyArrayObject
*
weight
,
PyArrayObject
*
top
,
const
int
direction
,
const
int
dH
=
1
,
const
int
dW
=
1
,
const
int
dilH
=
1
,
const
int
dilW
=
1
,
const
int
padH
=
0
,
const
int
padW
=
0
)
{
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
{
...
...
@@ -109,7 +119,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM received bottom with wrong type."
);
return
NULL
;
}
if
(
PyArray_NDIM
(
weight
)
!=
4
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM requires weight of 4D"
);
...
...
@@ -151,9 +161,12 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
"CorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
}
// implicit dilated filter
const
int
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
// top: (batchSize, nFilters, topHeight, topWidth)
const
int
topHeight
=
(
bottomHeight
+
2
*
padH
-
kH
)
/
dH
+
1
;
const
int
topWidth
=
(
bottomWidth
+
2
*
padW
-
kW
)
/
dW
+
1
;
const
int
topHeight
=
(
bottomHeight
+
2
*
padH
-
dil_
kH
)
/
dH
+
1
;
const
int
topWidth
=
(
bottomWidth
+
2
*
padW
-
dil_
kW
)
/
dW
+
1
;
if
(
batchSize
!=
PyArray_DIMS
(
top
)[
0
]
||
nFilters
!=
PyArray_DIMS
(
top
)[
1
]
||
topHeight
!=
PyArray_DIMS
(
top
)[
2
]
||
...
...
@@ -176,9 +189,9 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
col_dim
[
0
]
=
(
npy_intp
)(
nChannels
*
kW
*
kH
);
col_dim
[
1
]
=
(
npy_intp
)(
topHeight
*
topWidth
);
PyArrayObject
*
col
=
(
PyArrayObject
*
)
PyArray_EMPTY
(
2
,
col_dim
,
PyArray_TYPE
(
top
),
0
);
col_dim
,
PyArray_TYPE
(
top
),
0
);
if
(
NULL
==
col
)
{
PyErr_Format
(
PyExc_RuntimeError
,
...
...
@@ -206,7 +219,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
// Second, gemm
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
&
N_
,
&
M_
,
&
K_
,
...
...
@@ -255,7 +269,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
));
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
...
...
@@ -299,7 +314,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
}
else
if
(
direction
==
2
)
{
// backprop wrt. inputs
output
=
bottom
;
// bottom is set to zero here rather than inside of col2im
// bottom is set to zero here rather than inside of col2im
PyArray_FILLWBYTE
(
bottom
,
0
);
// full convolution: gemm, then col2im
// Iterate over batch
...
...
@@ -314,7 +329,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
&
N_
);
// col2im back to the data
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
),
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
);
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
);
}
/*
// Original caffe code for comparison
...
...
theano/tensor/nnet/opt.py
浏览文件 @
a668c6c5
...
...
@@ -79,7 +79,8 @@ def local_abstractconv_gemm(node):
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
CorrMM
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
img
,
kern
)
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
kern
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
...
...
@@ -97,7 +98,8 @@ def local_abstractconv_gradweight_gemm(node):
return
None
rval
=
CorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
img
,
topgrad
,
shape
)
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
# need to flip the kernel if necessary
...
...
@@ -124,8 +126,9 @@ def local_abstractconv_gradinputs_gemm(node):
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
CorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
)(
kern
,
topgrad
,
shape
)
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
kern
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
...
...
@@ -221,7 +224,9 @@ def local_conv2d_gradweight_cpu(node):
assert
len
(
op_imshp
)
==
4
and
len
(
op_kshp
)
==
4
outshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
node
.
op
.
border_mode
,
node
.
op
.
subsample
)[
2
:]
node
.
op
.
border_mode
,
node
.
op
.
subsample
,
node
.
op
.
filter_dilation
)[
2
:]
fulloutshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
node
.
op
.
border_mode
,
(
1
,
1
))[
2
:]
...
...
@@ -334,7 +339,9 @@ def local_conv2d_gradinputs_cpu(node):
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
outshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
node
.
op
.
border_mode
,
node
.
op
.
subsample
)[
2
:]
node
.
op
.
border_mode
,
node
.
op
.
subsample
,
node
.
op
.
filter_dilation
)[
2
:]
fulloutshp
=
get_conv_output_shape
(
op_imshp
,
op_kshp
,
node
.
op
.
border_mode
,
(
1
,
1
))[
2
:]
...
...
theano/tensor/nnet/tests/test_abstract_conv.py
浏览文件 @
a668c6c5
...
...
@@ -4,7 +4,6 @@ import numpy
import
numpy
as
np
from
nose.plugins.skip
import
SkipTest
from
nose.tools
import
assert_raises
import
theano
from
theano
import
tensor
from
theano.gof.opt
import
check_stack_trace
...
...
@@ -24,46 +23,57 @@ from theano.tensor.nnet.ConvGrad3D import ConvGrad3D
from
theano.tensor.nnet.ConvTransp3D
import
ConvTransp3D
def
conv_corr
(
inputs
,
filters
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
):
def
conv_corr
(
inputs
,
filters
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
filter_dilation
=
(
1
,
1
)):
if
conv_mode
==
'conv'
:
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
return
corr
.
CorrMM
(
border_mode
,
subsample
)(
inputs
,
filters
)
def
conv_corr_gw
(
inputs
,
topgrad
,
filters_shape
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
):
rval
=
corr
.
CorrMM_gradWeights
(
border_mode
,
subsample
)(
inputs
,
topgrad
,
filters_shape
[
2
:])
return
corr
.
CorrMM
(
border_mode
,
subsample
,
filter_dilation
)(
inputs
,
filters
)
def
conv_corr_gw
(
inputs
,
topgrad
,
filters_shape
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
filter_dilation
=
(
1
,
1
)):
rval
=
corr
.
CorrMM_gradWeights
(
border_mode
,
subsample
,
filter_dilation
)(
inputs
,
topgrad
,
filters_shape
[
2
:])
if
conv_mode
==
'conv'
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
return
rval
def
conv_corr_gi
(
filters
,
topgrad
,
inputs_shape
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
):
def
conv_corr_gi
(
filters
,
topgrad
,
inputs_shape
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
conv_mode
=
'conv'
,
filter_dilation
=
(
1
,
1
)):
if
conv_mode
==
'conv'
:
filters
=
filters
[:,
:,
::
-
1
,
::
-
1
]
return
corr
.
CorrMM_gradInputs
(
border_mode
,
subsample
)(
filters
,
topgrad
,
inputs_shape
[
2
:])
return
corr
.
CorrMM_gradInputs
(
border_mode
,
subsample
,
filter_dilation
)(
filters
,
topgrad
,
inputs_shape
[
2
:])
class
TestGetConvOutShape
(
unittest
.
TestCase
):
def
test_basic
(
self
):
image_shape
,
kernel_shape
=
(
3
,
2
,
8
,
9
),
(
4
,
2
,
5
,
6
)
image_shape
,
kernel_shape
=
(
3
,
2
,
12
,
9
),
(
4
,
2
,
5
,
6
)
sub_sample
=
(
1
,
2
)
filter_dilation
=
(
2
,
1
)
test1_params
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
'valid'
,
sub_sample
)
image_shape
,
kernel_shape
,
'valid'
,
sub_sample
,
filter_dilation
)
test2_params
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
'half'
,
sub_sample
)
image_shape
,
kernel_shape
,
'half'
,
sub_sample
,
filter_dilation
)
test3_params
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
'full'
,
sub_sample
)
image_shape
,
kernel_shape
,
'full'
,
sub_sample
,
filter_dilation
)
test4_params
=
get_conv_output_shape
(
image_shape
,
kernel_shape
,
(
1
,
2
),
sub_sample
)
image_shape
,
kernel_shape
,
(
1
,
2
),
sub_sample
,
filter_dilation
)
self
.
assertTrue
(
test1_params
==
(
3
,
4
,
4
,
2
))
self
.
assertTrue
(
test2_params
==
(
3
,
4
,
8
,
5
))
self
.
assertTrue
(
test3_params
==
(
3
,
4
,
12
,
7
))
self
.
assertTrue
(
test2_params
==
(
3
,
4
,
12
,
5
))
self
.
assertTrue
(
test3_params
==
(
3
,
4
,
20
,
7
))
self
.
assertTrue
(
test4_params
==
(
3
,
4
,
6
,
4
))
...
...
@@ -71,35 +81,41 @@ class BaseTestConv2d(unittest.TestCase):
def
setUp
(
self
):
if
theano
.
config
.
blas
.
ldflags
==
''
:
raise
SkipTest
(
"BLAS required for reference"
)
self
.
inputs_shapes
=
[(
8
,
1
,
12
,
12
),
(
8
,
1
,
18
,
18
),
(
2
,
1
,
4
,
4
),
self
.
inputs_shapes
=
[(
8
,
1
,
6
,
6
),
(
8
,
1
,
8
,
8
),
(
2
,
1
,
7
,
7
),
(
6
,
1
,
10
,
11
),
(
2
,
1
,
6
,
5
),
(
1
,
5
,
9
,
9
)]
self
.
filters_shapes
=
[(
5
,
1
,
2
,
2
),
(
4
,
1
,
3
,
3
),
(
2
,
1
,
3
,
3
),
(
1
,
1
,
2
,
5
),
(
4
,
1
,
2
,
2
),
(
4
,
5
,
2
,
2
)]
(
1
,
1
,
2
,
3
),
(
4
,
1
,
1
,
3
),
(
4
,
5
,
3
,
2
)]
self
.
subsamples
=
[(
1
,
1
),
(
2
,
2
),
(
2
,
4
)]
self
.
filters_dilations
=
[(
1
,
1
),
(
1
,
2
),
(
2
,
1
)]
self
.
border_modes
=
[
"valid"
,
"full"
,
(
0
,
0
),
(
1
,
1
),
(
5
,
5
),
(
5
,
2
)]
self
.
filter_flip
=
[
True
,
False
]
self
.
provide_shape
=
[
True
,
False
]
self
.
shared
=
theano
.
compile
.
shared
def
get_output_shape
(
self
,
inputs_shape
,
filters_shape
,
subsample
,
border_mode
):
def
get_output_shape
(
self
,
inputs_shape
,
filters_shape
,
subsample
,
border_mode
,
filter_dilation
):
dil_filters
=
((
filters_shape
[
2
]
-
1
)
*
filter_dilation
[
0
]
+
1
,
(
filters_shape
[
3
]
-
1
)
*
filter_dilation
[
1
]
+
1
)
if
border_mode
==
"valid"
:
border_mode
=
(
0
,
0
)
if
border_mode
==
"full"
:
border_mode
=
(
filters_shape
[
2
]
-
1
,
filters_shape
[
3
]
-
1
)
border_mode
=
(
dil_filters
[
0
]
-
1
,
dil_filters
[
1
]
-
1
)
batch_size
=
inputs_shape
[
0
]
num_filters
=
filters_shape
[
0
]
return
((
batch_size
,
num_filters
,)
+
tuple
(
None
if
i
is
None
or
k
is
None
else
((
i
+
2
*
pad
-
k
)
//
d
+
1
)
for
i
,
k
,
d
,
pad
in
zip
(
inputs_shape
[
2
:],
filters_shape
[
2
:],
subsample
,
border_mode
)))
else
((
i
+
2
*
pad
-
((
k
-
1
)
*
fd
+
1
))
//
d
+
1
)
for
i
,
k
,
d
,
pad
,
fd
in
zip
(
inputs_shape
[
2
:],
filters_shape
[
2
:],
subsample
,
border_mode
,
filter_dilation
)))
def
run_fwd
(
self
,
inputs_shape
,
filters_shape
,
ref
=
conv_corr
,
subsample
=
(
1
,
1
),
verify_grad
=
True
,
mode
=
None
,
border_mode
=
'valid'
,
filter_flip
=
True
,
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
):
border_mode
=
'valid'
,
filter_flip
=
True
,
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
,
filter_dilation
=
(
1
,
1
)):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
...
...
@@ -120,13 +136,15 @@ class BaseTestConv2d(unittest.TestCase):
c_ref
=
ref
(
inputs
,
filters
,
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)
conv_mode
=
conv_mode
,
filter_dilation
=
filter_dilation
)
c
=
conv
.
conv2d
(
inputs
,
filters
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
input_shape
=
imshp
,
filter_shape
=
kshp
)
filter_shape
=
kshp
,
filter_dilation
=
filter_dilation
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
...
...
@@ -143,15 +161,17 @@ class BaseTestConv2d(unittest.TestCase):
if
verify_grad
:
utt
.
verify_grad
(
conv
.
AbstractConv2d
(
border_mode
=
border_mode
,
imshp
=
imshp
,
kshp
=
kshp
,
subsample
=
subsample
),
subsample
=
subsample
,
filter_dilation
=
filter_dilation
),
[
inputs_val
,
filters_val
],
mode
=
mode
)
def
run_gradweight
(
self
,
inputs_shape
,
filters_shape
,
output_shape
,
ref
=
conv_corr_gw
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
verify_grad
=
True
,
mode
=
None
,
border_mode
=
'valid'
,
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
):
ref
=
conv_corr_gw
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
verify_grad
=
True
,
mode
=
None
,
border_mode
=
'valid'
,
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
,
filter_dilation
=
(
1
,
1
)):
inputs_val
=
numpy
.
random
.
random
(
inputs_shape
)
.
astype
(
'float32'
)
output_val
=
numpy
.
random
.
random
(
output_shape
)
.
astype
(
'float32'
)
...
...
@@ -171,13 +191,15 @@ class BaseTestConv2d(unittest.TestCase):
c
=
conv
.
AbstractConv2d_gradWeights
(
border_mode
=
border_mode
,
filter_flip
=
filter_flip
,
subsample
=
subsample
,
imshp
=
imshp
,
kshp
=
kshp
)
imshp
=
imshp
,
kshp
=
kshp
,
filter_dilation
=
filter_dilation
)
c
=
c
(
inputs
,
output
,
filters_shape
[
-
2
:])
c_ref
=
ref
(
inputs
,
output
,
filters_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)
conv_mode
=
conv_mode
,
filter_dilation
=
filter_dilation
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
...
...
@@ -193,7 +215,8 @@ class BaseTestConv2d(unittest.TestCase):
def
abstract_conv2d_gradweight
(
inputs_val
,
output_val
):
conv_op
=
conv
.
AbstractConv2d_gradWeights
(
border_mode
=
border_mode
,
subsample
=
subsample
)
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
return
conv_op
(
inputs_val
,
output_val
,
filters_shape
[
-
2
:])
if
verify_grad
:
...
...
@@ -204,8 +227,8 @@ class BaseTestConv2d(unittest.TestCase):
def
run_gradinput
(
self
,
inputs_shape
,
filters_shape
,
output_shape
,
ref
=
conv_corr_gi
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
verify_grad
=
True
,
mode
=
None
,
border_mode
=
'valid'
,
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
):
provide_shape
=
False
,
target_op
=
None
,
check_trace
=
False
,
filter_dilation
=
(
1
,
1
)):
output_val
=
numpy
.
random
.
random
(
output_shape
)
.
astype
(
'float32'
)
filters_val
=
numpy
.
random
.
random
(
filters_shape
)
.
astype
(
'float32'
)
output
=
self
.
shared
(
output_val
)
...
...
@@ -224,11 +247,12 @@ class BaseTestConv2d(unittest.TestCase):
c
=
conv
.
AbstractConv2d_gradInputs
(
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_flip
=
filter_flip
,
imshp
=
imshp
,
kshp
=
kshp
)
imshp
=
imshp
,
kshp
=
kshp
,
filter_dilation
=
filter_dilation
)
c
=
c
(
filters
,
output
,
inputs_shape
[
-
2
:])
c_ref
=
ref
(
filters
,
output
,
inputs_shape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
conv_mode
=
conv_mode
)
conv_mode
=
conv_mode
,
filter_dilation
=
filter_dilation
)
f
=
theano
.
function
([],
c
,
mode
=
mode
)
f_ref
=
theano
.
function
([],
c_ref
,
mode
=
'FAST_RUN'
)
...
...
@@ -244,7 +268,8 @@ class BaseTestConv2d(unittest.TestCase):
def
abstract_conv2d_gradinputs
(
filters_val
,
output_val
):
conv_op
=
conv
.
AbstractConv2d_gradInputs
(
border_mode
=
border_mode
,
subsample
=
subsample
)
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
return
conv_op
(
filters_val
,
output_val
,
inputs_shape
[
-
2
:])
if
verify_grad
:
...
...
@@ -266,15 +291,18 @@ class BaseTestConv2d(unittest.TestCase):
self
.
tcase
(
i
,
f
,
ds
,
db
,
dflip
,
provide_shape
)
except
SkipTest
as
e
:
skipped
=
e
for
s
in
self
.
subsamples
:
for
b
in
self
.
border_modes
:
try
:
self
.
tcase
(
i
,
f
,
s
,
db
,
dflip
,
dprovide_shape
)
except
SkipTest
as
e
:
skipped
=
e
for
fd
in
self
.
filters_dilations
:
for
s
in
self
.
subsamples
:
for
b
in
self
.
border_modes
:
try
:
self
.
tcase
(
i
,
f
,
s
,
db
,
dflip
,
dprovide_shape
,
fd
)
except
SkipTest
as
e
:
skipped
=
e
for
flip
in
self
.
filter_flip
:
try
:
self
.
tcase
(
i
,
f
,
ds
,
db
,
flip
,
dprovide_shape
)
self
.
tcase
(
i
,
f
,
ds
,
db
,
flip
,
dprovide_shape
)
except
SkipTest
as
e
:
skipped
=
e
if
skipped
:
...
...
@@ -287,26 +315,27 @@ class TestCorrConv2d(BaseTestConv2d):
raise
SkipTest
()
return
super
(
TestCorrConv2d
,
self
)
.
setUp
()
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
):
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
)
):
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
if
(
not
theano
.
config
.
blas
.
ldflags
or
not
theano
.
config
.
cxx
or
theano
.
config
.
mode
==
"FAST_COMPILE"
):
raise
SkipTest
(
"Need blas to test conv2d"
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
CorrMM
,
check_trace
=
True
)
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
CorrMM
,
check_trace
=
True
,
filter_dilation
=
fd
)
self
.
run_gradweight
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
CorrMM_gradWeights
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
self
.
run_gradinput
(
inputs_shape
=
i
,
filters_shape
=
f
,
output_shape
=
o
,
subsample
=
s
,
verify_grad
=
True
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
CorrMM_gradInputs
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
class
TestCpuConv2d
(
BaseTestConv2d
):
...
...
@@ -319,9 +348,11 @@ class TestCpuConv2d(BaseTestConv2d):
def
tearDown
(
self
):
theano
.
config
.
on_opt_error
=
self
.
opt_err
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
):
def
tcase
(
self
,
i
,
f
,
s
,
b
,
flip
,
provide_shape
,
fd
=
(
1
,
1
)):
if
fd
!=
(
1
,
1
):
raise
SkipTest
(
"No dilation implementation for basic cpu ConvOp."
)
mode
=
self
.
mode
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
)
o
=
self
.
get_output_shape
(
i
,
f
,
s
,
b
,
fd
)
fwd_OK
=
True
gradweight_OK
=
True
gradinput_OK
=
True
...
...
@@ -347,11 +378,12 @@ class TestCpuConv2d(BaseTestConv2d):
if
fwd_OK
:
if
not
theano
.
config
.
blas
.
ldflags
:
raise
SkipTest
(
"Need blas to test conv2d"
)
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
(
gradweight_OK
and
gradinput_OK
),
self
.
run_fwd
(
inputs_shape
=
i
,
filters_shape
=
f
,
subsample
=
s
,
verify_grad
=
(
gradweight_OK
and
gradinput_OK
),
mode
=
mode
,
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
ConvOp
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
self
.
assertRaises
(
AssertionError
,
self
.
run_fwd
,
...
...
@@ -363,7 +395,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
if
gradweight_OK
:
if
not
theano
.
config
.
blas
.
ldflags
:
...
...
@@ -374,7 +407,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
(
ConvOp
,
ConvGrad3D
),
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
self
.
assertRaises
(
AssertionError
,
self
.
run_gradweight
,
...
...
@@ -387,7 +421,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
if
gradinput_OK
:
if
not
theano
.
config
.
blas
.
ldflags
:
...
...
@@ -398,7 +433,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
target_op
=
(
ConvOp
,
ConvTransp3D
),
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
else
:
self
.
assertRaises
(
AssertionError
,
self
.
run_gradinput
,
...
...
@@ -411,7 +447,8 @@ class TestCpuConv2d(BaseTestConv2d):
provide_shape
=
provide_shape
,
border_mode
=
b
,
filter_flip
=
flip
,
check_trace
=
True
)
check_trace
=
True
,
filter_dilation
=
fd
)
def
test_constant_shapes
():
...
...
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
a668c6c5
...
...
@@ -32,8 +32,8 @@ class TestCorr2D(utt.InferShapeTester):
def
validate
(
self
,
image_shape
,
filter_shape
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
input
=
None
,
filters
=
None
,
verify_grad
=
True
,
non_contiguous
=
False
):
input
=
None
,
filters
=
None
,
verify_grad
=
True
,
non_contiguous
=
False
,
filter_dilation
=
(
1
,
1
)
):
"""
:param image_shape: The constant shape info passed to corrMM.
:param filter_shape: The constant shape info passed to corrMM.
...
...
@@ -55,7 +55,8 @@ class TestCorr2D(utt.InferShapeTester):
# define theano graph and function
input
.
name
=
'input'
filters
.
name
=
'filters'
rval
=
corr
.
CorrMM
(
border_mode
,
subsample
)(
input
,
filters
)
rval
=
corr
.
CorrMM
(
border_mode
,
subsample
,
filter_dilation
)(
input
,
filters
)
rval
.
name
=
'corr_output'
return
rval
...
...
@@ -86,20 +87,22 @@ class TestCorr2D(utt.InferShapeTester):
orig_image_data
=
image_data
img_shape2d
=
numpy
.
array
(
N_image_shape
[
-
2
:])
fil_shape2d
=
numpy
.
array
(
N_filter_shape
[
-
2
:])
dil_shape2d
=
numpy
.
array
(
filter_dilation
)
dil_fil_shape2d
=
(
fil_shape2d
-
1
)
*
dil_shape2d
+
1
subsample2d
=
numpy
.
array
(
subsample
)
if
border_mode
==
'full'
:
padHW
=
(
fil_shape2d
-
1
)
padHW
=
(
dil_
fil_shape2d
-
1
)
elif
border_mode
==
'valid'
:
padHW
=
numpy
.
array
([
0
,
0
])
elif
border_mode
==
'half'
:
padHW
=
numpy
.
floor
(
fil_shape2d
/
2
)
.
astype
(
'int32'
)
padHW
=
numpy
.
floor
(
dil_
fil_shape2d
/
2
)
.
astype
(
'int32'
)
elif
isinstance
(
border_mode
,
tuple
):
padHW
=
numpy
.
array
(
border_mode
)
elif
isinstance
(
border_mode
,
integer_types
):
padHW
=
numpy
.
array
([
border_mode
,
border_mode
])
else
:
raise
NotImplementedError
(
'Unsupported border_mode {}'
.
format
(
border_mode
))
out_shape2d
=
numpy
.
floor
((
img_shape2d
+
2
*
(
padHW
)
-
fil_shape2d
)
/
subsample2d
)
+
1
out_shape2d
=
numpy
.
floor
((
img_shape2d
+
2
*
(
padHW
)
-
dil_
fil_shape2d
)
/
subsample2d
)
+
1
# avoid numpy deprecation
out_shape2d
=
out_shape2d
.
astype
(
'int32'
)
out_shape
=
(
N_image_shape
[
0
],
N_filter_shape
[
0
])
+
tuple
(
out_shape2d
)
...
...
@@ -124,8 +127,8 @@ class TestCorr2D(utt.InferShapeTester):
for
col
in
range
(
ref_output
.
shape
[
3
]):
icol
=
col
*
subsample
[
1
]
# image col
ref_output
[
bb
,
nn
,
row
,
col
]
+=
(
image2d
[
irow
:
irow
+
N_filter_shape
[
2
],
icol
:
icol
+
N_filter_shape
[
3
]]
*
filter2d
[::
-
1
,
::
-
1
]
irow
:
irow
+
dil_fil_shape2d
[
0
]:
filter_dilation
[
0
],
icol
:
icol
+
dil_fil_shape2d
[
1
]:
filter_dilation
[
1
]]
*
filter2d
[::
-
1
,
::
-
1
]
)
.
sum
()
self
.
assertTrue
(
_allclose
(
theano_output
,
ref_output
))
...
...
@@ -186,6 +189,28 @@ class TestCorr2D(utt.InferShapeTester):
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
1
,
subsample
=
(
3
,
3
))
def
test_filter_dilation
(
self
):
"""
Tests correlation where filter dilation != (1,1)
"""
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'valid'
,
filter_dilation
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
14
,
10
),
(
5
,
2
,
2
,
3
),
'valid'
,
filter_dilation
=
(
3
,
1
))
self
.
validate
((
1
,
1
,
14
,
14
),
(
1
,
1
,
3
,
3
),
'valid'
,
filter_dilation
=
(
2
,
3
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
,
filter_dilation
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'full'
,
filter_dilation
=
(
3
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
'full'
,
filter_dilation
=
(
2
,
3
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'half'
,
filter_dilation
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
'half'
,
filter_dilation
=
(
3
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
'half'
,
filter_dilation
=
(
2
,
3
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
(
1
,
1
),
filter_dilation
=
(
2
,
2
))
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
(
2
,
1
),
filter_dilation
=
(
2
,
1
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
(
1
,
2
),
filter_dilation
=
(
1
,
2
))
self
.
validate
((
1
,
1
,
6
,
6
),
(
1
,
1
,
3
,
3
),
1
,
subsample
=
(
3
,
3
),
filter_dilation
=
(
2
,
2
))
@attr
(
'slow'
)
def
test_shape_Constant_tensor
(
self
):
"""
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论