Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
c2e14ce1
提交
c2e14ce1
authored
7月 17, 2017
作者:
abergeron
提交者:
GitHub
7月 17, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5991 from affanv14/group
Implement Grouped Convolutions
上级
110729fb
99758e6d
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
17 个修改的文件
包含
487 行增加
和
144 行删除
+487
-144
blas.py
theano/gpuarray/blas.py
+0
-0
corr_gemm.c
theano/gpuarray/corr_gemm.c
+42
-31
dnn.py
theano/gpuarray/dnn.py
+0
-0
dnn_base.c
theano/gpuarray/dnn_base.c
+11
-4
dnn_fwd.c
theano/gpuarray/dnn_fwd.c
+12
-7
dnn_gi.c
theano/gpuarray/dnn_gi.c
+19
-13
dnn_gw.c
theano/gpuarray/dnn_gw.c
+21
-13
opt.py
theano/gpuarray/opt.py
+9
-5
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+35
-0
test_gemmcorr.py
theano/gpuarray/tests/test_gemmcorr.py
+13
-0
__init__.py
theano/tensor/nnet/__init__.py
+13
-4
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+0
-0
corr.py
theano/tensor/nnet/corr.py
+47
-22
corr_gemm.c
theano/tensor/nnet/corr_gemm.c
+52
-41
opt.py
theano/tensor/nnet/opt.py
+14
-4
test_abstract_conv.py
theano/tensor/nnet/tests/test_abstract_conv.py
+155
-0
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+44
-0
没有找到文件。
theano/gpuarray/blas.py
浏览文件 @
c2e14ce1
差异被折叠。
点击展开。
theano/gpuarray/corr_gemm.c
浏览文件 @
c2e14ce1
...
...
@@ -348,7 +348,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dilH
=
1
,
const
size_t
dilW
=
1
,
const
size_t
padH
=
0
,
const
size_t
padW
=
0
)
const
size_t
padW
=
0
,
const
size_t
numgroups
=
1
)
{
if
(
PyGpuArray_NDIM
(
bottom
)
!=
4
)
{
...
...
@@ -411,7 +412,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
nFilters
=
PyGpuArray_DIMS
(
weight
)[
0
];
const
size_t
kH
=
PyGpuArray_DIMS
(
weight
)[
2
];
const
size_t
kW
=
PyGpuArray_DIMS
(
weight
)[
3
];
if
(
nChannels
!=
PyGpuArray_DIMS
(
weight
)[
1
]
)
{
if
(
nChannels
!=
(
PyGpuArray_DIMS
(
weight
)[
1
]
*
numgroups
)
)
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuCorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
...
...
@@ -469,11 +470,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
}
// Define some useful variables
const
size_t
bottom_stride
=
PyGpuArray_STRIDES
(
bottom
)[
0
]
/
gpuarray_get_elsize
(
bottom
->
ga
.
typecode
);
const
size_t
top_stride
=
PyGpuArray_STRIDES
(
top
)[
0
]
/
gpuarray_get_elsize
(
top
->
ga
.
typecode
);
const
size_t
K_
=
col_dim
[
0
];
const
size_t
batch_bottom_stride
=
PyGpuArray_STRIDES
(
bottom
)[
0
]
/
gpuarray_get_elsize
(
bottom
->
ga
.
typecode
);
const
size_t
batch_top_stride
=
PyGpuArray_STRIDES
(
top
)[
0
]
/
gpuarray_get_elsize
(
top
->
ga
.
typecode
);
const
size_t
group_bottom_stride
=
(
PyGpuArray_STRIDES
(
bottom
)[
1
]
*
nChannels
/
numgroups
)
/
gpuarray_get_elsize
(
bottom
->
ga
.
typecode
);
const
size_t
group_top_stride
=
(
PyGpuArray_STRIDES
(
top
)[
1
]
*
nFilters
/
numgroups
)
/
gpuarray_get_elsize
(
top
->
ga
.
typecode
);
const
size_t
group_weight_stride
=
(
PyGpuArray_STRIDES
(
weight
)[
0
]
*
nFilters
/
numgroups
)
/
gpuarray_get_elsize
(
weight
->
ga
.
typecode
);
const
size_t
K_
=
col_dim
[
0
]
/
numgroups
;
const
size_t
N_
=
col_dim
[
1
];
const
size_t
M_
=
nFilters
;
const
size_t
group_col_stride
=
(
K_
*
N_
);
const
size_t
M_
=
nFilters
/
numgroups
;
PyGpuArrayObject
*
output
;
if
(
direction
==
0
)
{
// forward pass
...
...
@@ -493,21 +498,23 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
err
=
im2col
(
&
bottom
->
ga
,
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_
bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
return
NULL
;
}
// Second, gemm
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
N_
,
M_
,
K_
,
1
,
&
col
->
ga
,
0
,
N_
,
&
weight
->
ga
,
0
,
K_
,
0
,
&
top
->
ga
,
n
*
top_stride
,
N_
);
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
N_
,
M_
,
K_
,
1
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
0
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
);
}
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM forward encountered an error running gemm: %d"
,
err
);
...
...
@@ -533,7 +540,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
err
=
im2col
(
&
bottom
->
ga
,
n
*
bottom_stride
,
err
=
im2col
(
&
bottom
->
ga
,
n
*
b
atch_b
ottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
...
...
@@ -545,12 +552,14 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
K_
,
M_
,
N_
,
1
,
&
col
->
ga
,
0
,
N_
,
&
top
->
ga
,
n
*
top_stride
,
N_
,
(
n
==
0
)
?
0
:
1
,
&
weight
->
ga
,
0
,
K_
);
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
K_
,
M_
,
N_
,
1
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
(
n
==
0
)
?
0
:
1
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
);
}
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad weights encountered an error running gemm: %d"
,
err
);
...
...
@@ -575,13 +584,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// full convolution: gemm, then col2im
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// gemm into columns
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
N_
,
K_
,
M_
,
1
,
&
top
->
ga
,
n
*
top_stride
,
N_
,
&
weight
->
ga
,
0
,
K_
,
0
,
&
col
->
ga
,
0
,
N_
);
// gemm into columns
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
N_
,
K_
,
M_
,
1
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
0
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
);
}
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad inputs encountered an error running gemm: %d"
,
err
);
...
...
@@ -591,7 +602,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// col2im back to the data
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
bottom
->
ga
,
n
*
bottom_stride
);
dH
,
dW
,
&
bottom
->
ga
,
n
*
b
atch_b
ottom_stride
);
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
return
NULL
;
...
...
theano/gpuarray/dnn.py
浏览文件 @
c2e14ce1
差异被折叠。
点击展开。
theano/gpuarray/dnn_base.c
浏览文件 @
c2e14ce1
#section support_code
static
int
c_set_tensor
Nd
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
desc
)
{
c_set_tensor
_for_conv
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
desc
,
size_t
groups
)
{
cudnnDataType_t
dt
;
size_t
ds
;
switch
(
var
->
ga
.
typecode
)
{
...
...
@@ -42,7 +42,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
strs
[
i
]
=
1
;
dims
[
i
]
=
1
;
}
//only for grouped convolution i.e when groups > 1
dims
[
1
]
=
dims
[
1
]
/
groups
;
cudnnStatus_t
err
=
cudnnSetTensorNdDescriptor
(
desc
,
dt
,
nd
<
3
?
3
:
nd
,
dims
,
strs
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
...
@@ -54,6 +55,11 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
return
0
;
}
static
int
c_set_tensorNd
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
desc
)
{
return
c_set_tensor_for_conv
(
var
,
desc
,
1
);
}
static
int
c_make_tensorNd
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
*
desc
)
{
cudnnStatus_t
err
;
err
=
cudnnCreateTensorDescriptor
(
desc
);
...
...
@@ -71,7 +77,7 @@ static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc)
}
static
int
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
)
{
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
,
size_t
groups
)
{
cudnnDataType_t
dt
;
cudnnStatus_t
err
;
...
...
@@ -111,6 +117,7 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
/* Filters can't be less than 3d so we pad */
for
(
unsigned
int
i
=
nd
;
i
<
3
;
i
++
)
dims
[
i
]
=
1
;
dims
[
0
]
=
dims
[
0
]
/
groups
;
if
(
nd
<
3
)
nd
=
3
;
...
...
@@ -135,7 +142,7 @@ static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
cudnnGetErrorString
(
err
));
return
-
1
;
}
if
(
c_set_filter
(
var
,
*
desc
)
!=
0
)
{
if
(
c_set_filter
(
var
,
*
desc
,
1
)
!=
0
)
{
cudnnDestroyFilterDescriptor
(
*
desc
);
return
-
1
;
}
...
...
theano/gpuarray/dnn_fwd.c
浏览文件 @
c2e14ce1
...
...
@@ -29,7 +29,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
]
*
params
->
num_groups
)
{
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same stack size"
);
return
1
;
...
...
@@ -72,12 +72,15 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return
0
;
}
if
(
c_set_tensor
Nd
(
input
,
APPLY_SPECIFIC
(
input
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
input
,
APPLY_SPECIFIC
(
input
),
params
->
num_groups
)
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
)
,
params
->
num_groups
)
==
-
1
)
return
1
;
if
(
c_set_tensor
Nd
(
*
output
,
APPLY_SPECIFIC
(
output
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
*
output
,
APPLY_SPECIFIC
(
output
),
params
->
num_groups
)
==
-
1
)
return
1
;
size_t
input_offset
=
PyGpuArray_STRIDE
(
input
,
0
)
/
params
->
num_groups
;
size_t
kern_offset
=
PyGpuArray_STRIDE
(
kerns
,
0
)
*
PyGpuArray_DIM
(
kerns
,
0
)
/
params
->
num_groups
;
size_t
output_offset
=
PyGpuArray_STRIDE
(
*
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionFwdAlgo_t
algo
=
params
->
conv_algo
;
...
...
@@ -281,15 +284,17 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_wait
(
kerns
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
for
(
int
g
=
0
;
g
<
params
->
num_groups
;
g
++
)
{
err
=
cudnnConvolutionForward
(
params
->
handle
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
)
+
input_offset
*
g
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
)
+
kern_offset
*
g
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
));
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
)
+
output_offset
*
g
);
}
if
(
worksize
!=
0
)
gpudata_release
(
workspace
);
...
...
theano/gpuarray/dnn_gi.c
浏览文件 @
c2e14ce1
...
...
@@ -28,7 +28,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
im
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
if
(
PyGpuArray_DIMS
(
im
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
]
*
params
->
num_groups
)
{
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same "
"stack size"
);
return
1
;
...
...
@@ -71,12 +71,15 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return
0
;
}
if
(
c_set_tensor
Nd
(
output
,
APPLY_SPECIFIC
(
output
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
output
,
APPLY_SPECIFIC
(
output
),
params
->
num_groups
)
==
-
1
)
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
)
,
params
->
num_groups
)
==
-
1
)
return
1
;
if
(
c_set_tensor
Nd
(
*
input
,
APPLY_SPECIFIC
(
input
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
*
input
,
APPLY_SPECIFIC
(
input
),
params
->
num_groups
)
==
-
1
)
return
1
;
size_t
input_offset
=
PyGpuArray_STRIDE
(
*
input
,
0
)
/
params
->
num_groups
;
size_t
kern_offset
=
PyGpuArray_STRIDE
(
kerns
,
0
)
*
PyGpuArray_DIM
(
kerns
,
0
)
/
params
->
num_groups
;
size_t
output_offset
=
PyGpuArray_STRIDE
(
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionBwdDataAlgo_t
algo
=
params
->
conv_algo
;
...
...
@@ -93,7 +96,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
if
(
PyGpuArray_NDIM
(
im
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
/
params
->
num_groups
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
...
...
@@ -286,14 +289,17 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardData
(
params
->
handle
,
alpha_p
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
));
for
(
int
g
=
0
;
g
<
params
->
num_groups
;
g
++
)
{
err
=
cudnnConvolutionBackwardData
(
params
->
handle
,
alpha_p
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
)
+
kern_offset
*
g
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
)
+
output_offset
*
g
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
)
+
input_offset
*
g
);
}
if
(
worksize
!=
0
)
gpudata_release
(
workspace
);
...
...
theano/gpuarray/dnn_gw.c
浏览文件 @
c2e14ce1
...
...
@@ -28,7 +28,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
km
)[
1
])
{
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
km
)[
1
]
*
params
->
num_groups
)
{
PyErr_SetString
(
PyExc_ValueError
,
"GpuDnnConv images and kernel must have the same stack size"
);
return
1
;
...
...
@@ -71,13 +71,17 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return
0
;
}
if
(
c_set_tensor
Nd
(
input
,
APPLY_SPECIFIC
(
input
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
input
,
APPLY_SPECIFIC
(
input
),
params
->
num_groups
)
==
-
1
)
return
1
;
if
(
c_set_tensor
Nd
(
output
,
APPLY_SPECIFIC
(
output
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
output
,
APPLY_SPECIFIC
(
output
),
params
->
num_groups
)
==
-
1
)
return
1
;
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
)
,
params
->
num_groups
)
==
-
1
)
return
1
;
size_t
input_offset
=
PyGpuArray_STRIDE
(
input
,
0
)
/
params
->
num_groups
;
size_t
kern_offset
=
PyGpuArray_STRIDE
(
*
kerns
,
0
)
*
PyGpuArray_DIM
(
*
kerns
,
0
)
/
params
->
num_groups
;
size_t
output_offset
=
PyGpuArray_STRIDE
(
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionBwdFilterAlgo_t
algo
=
params
->
conv_algo
;
cuda_enter
(
c
->
ctx
);
...
...
@@ -93,7 +97,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
/
params
->
num_groups
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%dx%ld"
...
...
@@ -273,14 +277,18 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardFilter
(
params
->
handle
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
));
for
(
int
g
=
0
;
g
<
params
->
num_groups
;
g
++
)
{
err
=
cudnnConvolutionBackwardFilter
(
params
->
handle
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
)
+
input_offset
*
g
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
)
+
output_offset
*
g
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
)
+
kern_offset
*
g
);
}
if
(
worksize
!=
0
)
gpudata_release
(
workspace
);
...
...
theano/gpuarray/opt.py
浏览文件 @
c2e14ce1
...
...
@@ -1533,7 +1533,8 @@ def local_abstractconv_gemm(node):
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))):
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))
and
node
.
op
.
num_groups
==
1
):
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
# need to dimshuffle the kernel for full convolution
...
...
@@ -1550,8 +1551,9 @@ def local_abstractconv_gemm(node):
# By default use GpuCorrMM
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
filter_dilation
,
node
.
op
.
num_groups
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth
...
...
@@ -1669,7 +1671,8 @@ def local_abstractconv_gradweights_gemm(node):
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
...
...
@@ -1713,7 +1716,8 @@ def local_abstractconv_gradinputs_gemm(node):
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
return
[
rval
]
...
...
theano/gpuarray/tests/test_dnn.py
浏览文件 @
c2e14ce1
...
...
@@ -25,6 +25,7 @@ from . import test_nnet
from
.rnn_support
import
Model
,
GRU
,
LSTM
,
WrapperLayer
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
try
:
import
pygpu
...
...
@@ -2263,3 +2264,37 @@ def test_dnn_rnn_lstm_grad_c():
(
i
+
1
)
*
len
(
cudnn_grads_layer
)]
for
j
,
g
in
enumerate
(
cudnn_grads_layer
):
utt
.
assert_allclose
(
ref_grads_layer
[
j
],
g
)
def
dconv2d
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
):
def
dconv
(
img
,
kern
):
return
dnn
.
dnn_conv
(
img
,
kern
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
'conv'
,
direction_hint
=
'forward'
,
workmem
=
None
,
algo
=
None
,
precision
=
None
,
num_groups
=
num_groups
)
return
dconv
def
dconv2dw
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
):
def
dconvw
(
img
,
topgrad
,
kshp
):
return
dnn
.
dnn_gradweight
(
img
,
topgrad
,
kshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
'conv'
,
precision
=
None
,
algo
=
None
,
num_groups
=
num_groups
)
return
dconvw
def
dconv2di
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
):
def
dconvi
(
kern
,
topgrad
,
imshp
):
return
dnn
.
dnn_gradinput
(
kern
,
topgrad
,
imshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
'conv'
,
precision
=
None
,
algo
=
None
,
num_groups
=
num_groups
)
return
dconvi
class
Cudnn_grouped_conv
(
Grouped_conv_noOptim
):
mode
=
mode_with_gpu
conv2d
=
staticmethod
(
dconv2d
)
conv2d_gradw
=
staticmethod
(
dconv2dw
)
conv2d_gradi
=
staticmethod
(
dconv2di
)
conv2d_op
=
dnn
.
GpuDnnConv
conv2d_gradw_op
=
dnn
.
GpuDnnConvGradW
conv2d_gradi_op
=
dnn
.
GpuDnnConvGradI
flip_filter
=
False
is_dnn
=
True
theano/gpuarray/tests/test_gemmcorr.py
浏览文件 @
c2e14ce1
...
...
@@ -11,6 +11,7 @@ from theano.tensor.nnet.corr import CorrMM, CorrMM_gradWeights, CorrMM_gradInput
from
..type
import
gpuarray_shared_constructor
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
class
TestCorrMM
(
unittest
.
TestCase
):
...
...
@@ -219,3 +220,15 @@ class TestCorrMM(unittest.TestCase):
verify_grad
=
False
)
self
.
run_gradinput
(
inputs_shape
=
(
1
,
1024
,
3
,
1
),
filters_shape
=
(
1
,
1
,
1
,
1024
))
class
TestGroupGpuCorr2d
(
Grouped_conv_noOptim
):
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
conv2d
=
GpuCorrMM
conv2d_gradw
=
GpuCorrMM_gradWeights
conv2d_gradi
=
GpuCorrMM_gradInputs
conv2d_op
=
GpuCorrMM
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradi_op
=
GpuCorrMM_gradInputs
flip_filter
=
True
is_dnn
=
False
theano/tensor/nnet/__init__.py
浏览文件 @
c2e14ce1
...
...
@@ -39,7 +39,7 @@ from .abstract_conv import conv3d
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
**
kwargs
):
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
**
kwargs
):
"""
This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
...
...
@@ -103,6 +103,10 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
Factor by which to subsample (stride) the input.
Also called dilation elsewhere.
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
kwargs: Any other keyword arguments are accepted for backwards
compatibility, but will be ignored.
...
...
@@ -152,12 +156,12 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
return
abstract_conv2d
(
input
,
filters
,
input_shape
,
filter_shape
,
border_mode
,
subsample
,
filter_flip
,
filter_dilation
)
filter_dilation
,
num_groups
)
def
conv2d_transpose
(
input
,
filters
,
output_shape
,
filter_shape
=
None
,
border_mode
=
'valid'
,
input_dilation
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)
,
num_groups
=
1
):
"""
This function will build the symbolic graph for applying a transposed
convolution over a mini-batch of a stack of 2D inputs with a set of 2D
...
...
@@ -209,6 +213,10 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
Factor by which to subsample (stride) the input.
Also called dilation elsewhere.
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
Returns
-------
Symbolic 4D tensor
...
...
@@ -235,4 +243,5 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
border_mode
=
border_mode
,
subsample
=
input_dilation
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
)
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
theano/tensor/nnet/abstract_conv.py
浏览文件 @
c2e14ce1
差异被折叠。
点击展开。
theano/tensor/nnet/corr.py
浏览文件 @
c2e14ce1
...
...
@@ -40,9 +40,11 @@ class BaseCorrMM(gof.OpenMPOp):
Perform subsampling of the output (default: (1, 1)).
filter_dilation
Perform dilated correlation (default: (1,1))
num_groups
Perform grouped convolutions (default: 1)
"""
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
)
_direction
=
None
...
...
@@ -51,10 +53,11 @@ class BaseCorrMM(gof.OpenMPOp):
(
'DIRECTION_BACKPROP_INPUTS'
,
'backprop inputs'
)),
# 2
dH
=
int64
,
dW
=
int64
,
dilH
=
int64
,
dilW
=
int64
,
padH
=
int64
,
padW
=
int64
)
padH
=
int64
,
padW
=
int64
,
num_groups
=
int64
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
openmp
=
None
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
openmp
=
None
):
super
(
BaseCorrMM
,
self
)
.
__init__
(
openmp
=
openmp
)
if
isinstance
(
border_mode
,
integer_types
):
if
border_mode
<
0
:
...
...
@@ -97,6 +100,9 @@ class BaseCorrMM(gof.OpenMPOp):
if
self
.
_direction
not
in
[
"forward"
,
"backprop weights"
,
"backprop inputs"
]:
raise
ValueError
(
"_direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'"
)
if
num_groups
<
1
:
raise
ValueError
(
"Number of groups should be greater than 0"
)
self
.
num_groups
=
num_groups
@property
def
pad
(
self
):
...
...
@@ -124,11 +130,12 @@ class BaseCorrMM(gof.OpenMPOp):
padW
=
property
(
lambda
self
:
self
.
pad
[
1
])
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s,
%
s
,
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
border_mode
,
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
))
str
(
self
.
filter_dilation
),
str
(
self
.
num_groups
))
@staticmethod
def
as_common_dtype
(
in1
,
in2
):
...
...
@@ -138,6 +145,11 @@ class BaseCorrMM(gof.OpenMPOp):
dtype
=
theano
.
scalar
.
upcast
(
in1
.
dtype
,
in2
.
dtype
)
return
in1
.
astype
(
dtype
),
in2
.
astype
(
dtype
)
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
if
not
hasattr
(
self
,
'num_groups'
):
self
.
num_groups
=
1
def
c_support_code
(
self
):
ccodes
=
blas_headers
.
blas_header_text
()
if
self
.
blas_type
==
'openblas'
:
...
...
@@ -167,7 +179,7 @@ class BaseCorrMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
return
(
6
,
self
.
openmp
,
blas_header_version
())
return
(
7
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
...
@@ -274,6 +286,7 @@ class BaseCorrMM(gof.OpenMPOp):
int dilW =
%(params)
s->dilW;
int padH =
%(params)
s->padH;
int padW =
%(params)
s->padW;
int numgroups =
%(params)
s->num_groups;
PyArrayObject * bottom =
%(bottom)
s;
PyArrayObject * weights =
%(weights)
s;
...
...
@@ -386,7 +399,7 @@ class BaseCorrMM(gof.OpenMPOp):
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1]
/ numgroups
;
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
...
...
@@ -409,7 +422,7 @@ class BaseCorrMM(gof.OpenMPOp):
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1]
* numgroups
;
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
...
...
@@ -465,7 +478,7 @@ class BaseCorrMM(gof.OpenMPOp):
}
// Call corrMM code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW
, numgroups
);
if (out2==NULL){
%(fail)
s
}
...
...
@@ -541,12 +554,14 @@ class CorrMM(BaseCorrMM):
top
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
...
...
@@ -600,6 +615,7 @@ class CorrMM_gradWeights(BaseCorrMM):
imshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
ssize
,
imshp
=
imshp
[
1
],
list
(
imshp
[
2
:])
ssize
=
ssize
//
self
.
num_groups
nkern
,
topshp
=
topshp
[
1
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
if
((
dH
!=
1
)
or
(
padH
==
-
1
)):
...
...
@@ -632,11 +648,13 @@ class CorrMM_gradWeights(BaseCorrMM):
weights
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
self
.
filter_dilation
,
self
.
num_groups
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
return
(
d_bottom
,
d_top
)
+
d_height_width
...
...
@@ -678,8 +696,12 @@ class CorrMM_gradInputs(BaseCorrMM):
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
False
,
False
]
if
self
.
num_groups
>
1
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
False
]
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
False
,
False
]
dtype
=
kern
.
type
.
dtype
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
[
TensorType
(
dtype
,
broadcastable
)()])
...
...
@@ -698,6 +720,7 @@ class CorrMM_gradInputs(BaseCorrMM):
kshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
ssize
,
kshp
=
kshp
[
1
],
list
(
kshp
[
2
:])
ssize
=
ssize
*
self
.
num_groups
bsize
,
topshp
=
topshp
[
0
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
if
padH
==
-
1
:
...
...
@@ -738,12 +761,14 @@ class CorrMM_gradInputs(BaseCorrMM):
bottom
,
=
grads
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
return
(
d_weights
,
d_top
)
+
d_height_width
...
...
theano/tensor/nnet/corr_gemm.c
浏览文件 @
c2e14ce1
...
...
@@ -106,7 +106,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
dilH
=
1
,
const
int
dilW
=
1
,
const
int
padH
=
0
,
const
int
padW
=
0
)
const
int
padW
=
0
,
const
int
numgroups
=
1
)
{
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
{
...
...
@@ -155,7 +156,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
nFilters
=
PyArray_DIMS
(
weight
)[
0
];
const
int
kH
=
PyArray_DIMS
(
weight
)[
2
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
3
];
if
(
nChannels
!=
PyArray_DIMS
(
weight
)[
1
]
)
{
if
(
nChannels
!=
(
PyArray_DIMS
(
weight
)[
1
]
*
numgroups
)
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
...
...
@@ -214,12 +215,16 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
}
// Define some useful variables
const
int
bottom_stride
=
PyArray_STRIDES
(
bottom
)[
0
]
/%
(
n_bytes
)
f
;
const
int
top_stride
=
PyArray_STRIDES
(
top
)[
0
]
/%
(
n_bytes
)
f
;
const
int
K_
=
col_dim
[
1
];
const
int
batch_bottom_stride
=
PyArray_STRIDES
(
bottom
)[
0
]
/%
(
n_bytes
)
f
;
const
int
group_bottom_stride
=
(
PyArray_STRIDES
(
bottom
)[
1
]
*
nChannels
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
batch_top_stride
=
PyArray_STRIDES
(
top
)[
0
]
/%
(
n_bytes
)
f
;
const
int
group_top_stride
=
(
PyArray_STRIDES
(
top
)[
1
]
*
nFilters
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
K_
=
col_dim
[
1
]
/
numgroups
;
const
int
N_
=
col_dim
[
2
];
const
int
col_stride
=
(
K_
*
N_
);
const
int
M_
=
nFilters
;
const
int
col_stride
=
(
K_
*
N_
*
numgroups
);
const
int
group_col_stride
=
(
K_
*
N_
);
const
int
group_weight_stride
=
(
PyArray_STRIDES
(
weight
)[
0
]
*
nFilters
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
M_
=
nFilters
/
numgroups
;
const
%
(
c_float_type
)
s
one
=
1
.
0
;
const
%
(
c_float_type
)
s
zero
=
0
.
0
;
char
NTrans
=
'N'
;
...
...
@@ -253,17 +258,19 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
int
tid
=
%
(
omp_get_thread_num
)
s
;
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
ottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
atch_bottom_stride
,
nChannels
,
bottom
Height
,
bottom
Width
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
// Second, gemm
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
&
N_
,
&
M_
,
&
K_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
);
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
&
N_
,
&
M_
,
&
K_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
,
&
N_
);
}
}
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
...
...
@@ -304,7 +311,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
output
=
weight
;
npy_intp
weight_dim
[
2
];
weight_dim
[
0
]
=
(
npy_intp
)
max_threads
;
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
);
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
*
numgroups
);
PyArrayObject
*
local_weight
=
(
PyArrayObject
*
)
PyArray_ZEROS
(
2
,
weight_dim
,
PyArray_TYPE
(
weight
),
0
);
...
...
@@ -326,21 +333,23 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
int
tid
=
%
(
omp_get_thread_num
)
s
;
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
ottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
atch_bottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
&
K_
,
&
M_
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
tid
*
weight_dim
[
1
],
&
K_
);
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
&
K_
,
&
M_
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
g
*
group_weight_stride
+
tid
*
weight_dim
[
1
],
&
K_
);
}
}
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
...
...
@@ -401,19 +410,21 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
%
(
blas_set_num_threads
)
s
(
1
);
%
(
omp_flags
)
s
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
// gemm into columns
int
tid
=
%
(
omp_get_thread_num
)
s
;
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
&
N_
,
&
K_
,
&
M_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
&
N_
);
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// gemm into columns
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
&
N_
,
&
K_
,
&
M_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
);
}
// col2im back to the data
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
);
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
atch_b
ottom_stride
);
}
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
...
...
theano/tensor/nnet/opt.py
浏览文件 @
c2e14ce1
...
...
@@ -88,7 +88,9 @@ def local_abstractconv_gemm(node):
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
CorrMM
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
kern
)
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
kern
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
...
...
@@ -133,7 +135,8 @@ def local_abstractconv_gradweight_gemm(node):
rval
=
CorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
topgrad
,
shape
)
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
# need to flip the kernel if necessary
...
...
@@ -190,8 +193,9 @@ def local_abstractconv_gradinputs_gemm(node):
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
CorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
kern
,
topgrad
,
shape
)
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
kern
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
...
...
@@ -238,6 +242,8 @@ def local_conv2d_cpu(node):
if
not
node
.
op
.
filter_flip
:
# Not tested yet
return
None
if
node
.
op
.
num_groups
>
1
:
return
None
rval
=
conv2d
(
img
,
kern
,
node
.
op
.
imshp
,
node
.
op
.
kshp
,
...
...
@@ -295,6 +301,8 @@ def local_conv2d_gradweight_cpu(node):
if
not
node
.
op
.
filter_flip
:
# Not tested yet
return
if
node
.
op
.
num_groups
>
1
:
return
None
if
node
.
op
.
border_mode
==
'valid'
and
\
(
node
.
op
.
subsample
!=
(
1
,
1
)):
...
...
@@ -447,6 +455,8 @@ def local_conv2d_gradinputs_cpu(node):
if
not
node
.
op
.
filter_flip
:
# Not tested yet
return
None
if
node
.
op
.
num_groups
>
1
:
return
None
# Conv 3d implementation, needed when subsample > 2
if
node
.
op
.
border_mode
==
'valid'
and
node
.
op
.
subsample
!=
(
1
,
1
):
...
...
theano/tensor/nnet/tests/test_abstract_conv.py
浏览文件 @
c2e14ce1
...
...
@@ -1699,3 +1699,158 @@ class TestConv2dGrads(unittest.TestCase):
)
f_new
=
theano
.
function
([
self
.
x
,
self
.
output_grad_wrt
],
conv_wrt_w_out
)
utt
.
assert_allclose
(
f_new
(
input_val
,
out_grad_val
),
f_old
(
input_val
,
filter_val
,
out_grad_val
))
class
Grouped_conv_noOptim
(
unittest
.
TestCase
):
conv2d
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
conv2d_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
mode
=
theano
.
Mode
(
optimizer
=
None
)
flip_filter
=
False
is_dnn
=
False
def
setUp
(
self
):
self
.
num_groups
=
[
3
,
2
,
4
,
4
]
self
.
border_mode
=
'valid'
self
.
subsample
=
(
1
,
1
)
self
.
img_shape
=
[(
5
,
6
,
5
,
5
),
(
4
,
4
,
7
,
5
),
(
3
,
8
,
5
,
3
),
(
2
,
4
,
7
,
7
)]
self
.
kern_shape
=
[(
6
,
2
,
3
,
3
),
(
6
,
2
,
5
,
3
),
(
4
,
2
,
3
,
3
),
(
4
,
1
,
3
,
5
)]
self
.
top_shape
=
[(
5
,
6
,
3
,
3
),
(
4
,
6
,
3
,
3
),
(
3
,
4
,
3
,
1
),
(
2
,
4
,
5
,
3
)]
self
.
filter_dilation
=
(
1
,
1
)
self
.
ref_mode
=
'FAST_RUN'
if
theano
.
config
.
cxx
==
""
:
raise
SkipTest
(
"CorrMM needs cxx"
)
def
test_fwd
(
self
):
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
kern_sym
=
theano
.
tensor
.
tensor4
(
'kern'
)
for
imshp
,
kshp
,
groups
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
num_groups
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
split_imgs
=
np
.
split
(
img
,
groups
,
axis
=
1
)
split_kern
=
np
.
split
(
kern
,
groups
,
axis
=
0
)
grouped_conv_op
=
self
.
conv2d
(
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
)
if
self
.
flip_filter
:
grouped_conv_output
=
grouped_conv_op
(
img_sym
,
kern_sym
[:,
:,
::
-
1
,
::
-
1
])
else
:
grouped_conv_output
=
grouped_conv_op
(
img_sym
,
kern_sym
)
grouped_func
=
theano
.
function
([
img_sym
,
kern_sym
],
grouped_conv_output
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_op
)
for
node
in
grouped_func
.
maker
.
fgraph
.
toposort
()])
grouped_output
=
grouped_func
(
img
,
kern
)
ref_conv_op
=
conv2d_corr
(
img_sym
,
kern_sym
,
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
)
ref_func
=
theano
.
function
([
img_sym
,
kern_sym
],
ref_conv_op
,
mode
=
self
.
ref_mode
)
ref_concat_output
=
[
ref_func
(
img_arr
,
kern_arr
)
for
img_arr
,
kern_arr
in
zip
(
split_imgs
,
split_kern
)]
ref_concat_output
=
np
.
concatenate
(
ref_concat_output
,
axis
=
1
)
utt
.
assert_allclose
(
grouped_output
,
ref_concat_output
)
utt
.
verify_grad
(
grouped_conv_op
,
[
img
,
kern
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradweights
(
self
):
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
for
imshp
,
kshp
,
tshp
,
groups
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
top_shape
,
self
.
num_groups
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
tshp
)
.
astype
(
theano
.
config
.
floatX
)
split_imgs
=
np
.
split
(
img
,
groups
,
axis
=
1
)
split_top
=
np
.
split
(
top
,
groups
,
axis
=
1
)
grouped_convgrad_op
=
self
.
conv2d_gradw
(
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
)
grouped_conv_output
=
grouped_convgrad_op
(
img_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
kshp
if
self
.
is_dnn
else
kshp
[
-
2
:]))
if
self
.
flip_filter
:
grouped_conv_output
=
grouped_conv_output
[:,
:,
::
-
1
,
::
-
1
]
grouped_func
=
theano
.
function
([
img_sym
,
top_sym
],
grouped_conv_output
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradw_op
)
for
node
in
grouped_func
.
maker
.
fgraph
.
toposort
()])
grouped_output
=
grouped_func
(
img
,
top
)
ref_conv_op
=
conv2d_corr_gw
(
img_sym
,
top_sym
,
kshp
,
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
)
ref_func
=
theano
.
function
([
img_sym
,
top_sym
],
ref_conv_op
,
mode
=
self
.
ref_mode
)
ref_concat_output
=
[
ref_func
(
img_arr
,
top_arr
)
for
img_arr
,
top_arr
in
zip
(
split_imgs
,
split_top
)]
ref_concat_output
=
np
.
concatenate
(
ref_concat_output
,
axis
=
0
)
utt
.
assert_allclose
(
grouped_output
,
ref_concat_output
)
def
conv_gradweight
(
inputs_val
,
output_val
):
return
grouped_convgrad_op
(
inputs_val
,
output_val
,
tensor
.
as_tensor_variable
(
kshp
if
self
.
is_dnn
else
kshp
[
-
2
:]))
utt
.
verify_grad
(
conv_gradweight
,
[
img
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradinputs
(
self
):
kern_sym
=
theano
.
tensor
.
tensor4
(
'kern'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
for
imshp
,
kshp
,
tshp
,
groups
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
top_shape
,
self
.
num_groups
):
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
tshp
)
.
astype
(
theano
.
config
.
floatX
)
split_kerns
=
np
.
split
(
kern
,
groups
,
axis
=
0
)
split_top
=
np
.
split
(
top
,
groups
,
axis
=
1
)
grouped_convgrad_op
=
self
.
conv2d_gradi
(
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
)
if
self
.
flip_filter
:
grouped_conv_output
=
grouped_convgrad_op
(
kern_sym
[:,
:,
::
-
1
,
::
-
1
],
top_sym
,
tensor
.
as_tensor_variable
(
imshp
[
-
2
:]))
else
:
grouped_conv_output
=
grouped_convgrad_op
(
kern_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
imshp
if
self
.
is_dnn
else
imshp
[
-
2
:]))
grouped_func
=
theano
.
function
([
kern_sym
,
top_sym
],
grouped_conv_output
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradi_op
)
for
node
in
grouped_func
.
maker
.
fgraph
.
toposort
()])
grouped_output
=
grouped_func
(
kern
,
top
)
ref_conv_op
=
conv2d_corr_gi
(
kern_sym
,
top_sym
,
imshp
,
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
)
ref_func
=
theano
.
function
([
kern_sym
,
top_sym
],
ref_conv_op
,
mode
=
self
.
ref_mode
)
ref_concat_output
=
[
ref_func
(
kern_arr
,
top_arr
)
for
kern_arr
,
top_arr
in
zip
(
split_kerns
,
split_top
)]
ref_concat_output
=
np
.
concatenate
(
ref_concat_output
,
axis
=
1
)
utt
.
assert_allclose
(
grouped_output
,
ref_concat_output
)
def
conv_gradinputs
(
filters_val
,
output_val
):
return
grouped_convgrad_op
(
filters_val
,
output_val
,
tensor
.
as_tensor_variable
(
imshp
if
self
.
is_dnn
else
imshp
[
-
2
:]))
utt
.
verify_grad
(
conv_gradinputs
,
[
kern
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
c2e14ce1
...
...
@@ -10,6 +10,7 @@ import theano
import
theano.tensor
as
T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
class
TestCorr2D
(
utt
.
InferShapeTester
):
...
...
@@ -416,6 +417,49 @@ class TestCorr2D(utt.InferShapeTester):
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
2
,
non_contiguous
=
True
)
class
TestGroupCorr2d
(
Grouped_conv_noOptim
):
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
else
:
mode
=
None
conv2d
=
corr
.
CorrMM
conv2d_gradw
=
corr
.
CorrMM_gradWeights
conv2d_gradi
=
corr
.
CorrMM_gradInputs
conv2d_op
=
corr
.
CorrMM
conv2d_gradw_op
=
corr
.
CorrMM_gradWeights
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
flip_filter
=
True
is_dnn
=
False
def
test_graph
(
self
):
# define common values first
groups
=
3
bottom
=
np
.
random
.
rand
(
3
,
6
,
5
,
5
)
.
astype
(
theano
.
config
.
floatX
)
kern
=
np
.
random
.
rand
(
9
,
2
,
3
,
3
)
.
astype
(
theano
.
config
.
floatX
)
bottom_sym
=
T
.
tensor4
(
'bottom'
)
kern_sym
=
T
.
tensor4
(
'kern'
)
# grouped convolution graph
conv_group
=
self
.
conv2d
(
num_groups
=
groups
)(
bottom_sym
,
kern_sym
)
gconv_func
=
theano
.
function
([
bottom_sym
,
kern_sym
],
conv_group
,
mode
=
self
.
mode
)
# Graph for the normal hard way
kern_offset
=
kern_sym
.
shape
[
0
]
//
groups
bottom_offset
=
bottom_sym
.
shape
[
1
]
//
groups
split_conv_output
=
[
self
.
conv2d
()(
bottom_sym
[:,
i
*
bottom_offset
:(
i
+
1
)
*
bottom_offset
,
:,
:],
kern_sym
[
i
*
kern_offset
:(
i
+
1
)
*
kern_offset
,
:,
:,
:])
for
i
in
range
(
groups
)]
concatenated_output
=
T
.
concatenate
(
split_conv_output
,
axis
=
1
)
conv_func
=
theano
.
function
([
bottom_sym
,
kern_sym
],
concatenated_output
,
mode
=
self
.
mode
)
# calculate outputs for each graph
gconv_output
=
gconv_func
(
bottom
,
kern
)
conv_output
=
conv_func
(
bottom
,
kern
)
# compare values
utt
.
assert_allclose
(
gconv_output
,
conv_output
)
if
__name__
==
'__main__'
:
t
=
TestCorr2D
(
'setUp'
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论