Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
c2e14ce1
提交
c2e14ce1
authored
7月 17, 2017
作者:
abergeron
提交者:
GitHub
7月 17, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5991 from affanv14/group
Implement Grouped Convolutions
上级
110729fb
99758e6d
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
17 个修改的文件
包含
487 行增加
和
144 行删除
+487
-144
blas.py
theano/gpuarray/blas.py
+0
-0
corr_gemm.c
theano/gpuarray/corr_gemm.c
+42
-31
dnn.py
theano/gpuarray/dnn.py
+0
-0
dnn_base.c
theano/gpuarray/dnn_base.c
+11
-4
dnn_fwd.c
theano/gpuarray/dnn_fwd.c
+12
-7
dnn_gi.c
theano/gpuarray/dnn_gi.c
+19
-13
dnn_gw.c
theano/gpuarray/dnn_gw.c
+21
-13
opt.py
theano/gpuarray/opt.py
+9
-5
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+35
-0
test_gemmcorr.py
theano/gpuarray/tests/test_gemmcorr.py
+13
-0
__init__.py
theano/tensor/nnet/__init__.py
+13
-4
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+0
-0
corr.py
theano/tensor/nnet/corr.py
+47
-22
corr_gemm.c
theano/tensor/nnet/corr_gemm.c
+52
-41
opt.py
theano/tensor/nnet/opt.py
+14
-4
test_abstract_conv.py
theano/tensor/nnet/tests/test_abstract_conv.py
+155
-0
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+44
-0
没有找到文件。
theano/gpuarray/blas.py
浏览文件 @
c2e14ce1
差异被折叠。
点击展开。
theano/gpuarray/corr_gemm.c
浏览文件 @
c2e14ce1
...
@@ -348,7 +348,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -348,7 +348,8 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
dilH
=
1
,
const
size_t
dilH
=
1
,
const
size_t
dilW
=
1
,
const
size_t
dilW
=
1
,
const
size_t
padH
=
0
,
const
size_t
padH
=
0
,
const
size_t
padW
=
0
)
const
size_t
padW
=
0
,
const
size_t
numgroups
=
1
)
{
{
if
(
PyGpuArray_NDIM
(
bottom
)
!=
4
)
if
(
PyGpuArray_NDIM
(
bottom
)
!=
4
)
{
{
...
@@ -411,7 +412,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -411,7 +412,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
const
size_t
nFilters
=
PyGpuArray_DIMS
(
weight
)[
0
];
const
size_t
nFilters
=
PyGpuArray_DIMS
(
weight
)[
0
];
const
size_t
kH
=
PyGpuArray_DIMS
(
weight
)[
2
];
const
size_t
kH
=
PyGpuArray_DIMS
(
weight
)[
2
];
const
size_t
kW
=
PyGpuArray_DIMS
(
weight
)[
3
];
const
size_t
kW
=
PyGpuArray_DIMS
(
weight
)[
3
];
if
(
nChannels
!=
PyGpuArray_DIMS
(
weight
)[
1
]
)
{
if
(
nChannels
!=
(
PyGpuArray_DIMS
(
weight
)[
1
]
*
numgroups
)
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"GpuCorrMM images and kernel must have the same stack size
\n
"
);
"GpuCorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
return
NULL
;
...
@@ -469,11 +470,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -469,11 +470,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
}
}
// Define some useful variables
// Define some useful variables
const
size_t
bottom_stride
=
PyGpuArray_STRIDES
(
bottom
)[
0
]
/
gpuarray_get_elsize
(
bottom
->
ga
.
typecode
);
const
size_t
batch_bottom_stride
=
PyGpuArray_STRIDES
(
bottom
)[
0
]
/
gpuarray_get_elsize
(
bottom
->
ga
.
typecode
);
const
size_t
top_stride
=
PyGpuArray_STRIDES
(
top
)[
0
]
/
gpuarray_get_elsize
(
top
->
ga
.
typecode
);
const
size_t
batch_top_stride
=
PyGpuArray_STRIDES
(
top
)[
0
]
/
gpuarray_get_elsize
(
top
->
ga
.
typecode
);
const
size_t
K_
=
col_dim
[
0
];
const
size_t
group_bottom_stride
=
(
PyGpuArray_STRIDES
(
bottom
)[
1
]
*
nChannels
/
numgroups
)
/
gpuarray_get_elsize
(
bottom
->
ga
.
typecode
);
const
size_t
group_top_stride
=
(
PyGpuArray_STRIDES
(
top
)[
1
]
*
nFilters
/
numgroups
)
/
gpuarray_get_elsize
(
top
->
ga
.
typecode
);
const
size_t
group_weight_stride
=
(
PyGpuArray_STRIDES
(
weight
)[
0
]
*
nFilters
/
numgroups
)
/
gpuarray_get_elsize
(
weight
->
ga
.
typecode
);
const
size_t
K_
=
col_dim
[
0
]
/
numgroups
;
const
size_t
N_
=
col_dim
[
1
];
const
size_t
N_
=
col_dim
[
1
];
const
size_t
M_
=
nFilters
;
const
size_t
group_col_stride
=
(
K_
*
N_
);
const
size_t
M_
=
nFilters
/
numgroups
;
PyGpuArrayObject
*
output
;
PyGpuArrayObject
*
output
;
if
(
direction
==
0
)
{
// forward pass
if
(
direction
==
0
)
{
// forward pass
...
@@ -493,21 +498,23 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -493,21 +498,23 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
// First, im2col
err
=
im2col
(
&
bottom
->
ga
,
n
*
bottom_stride
,
err
=
im2col
(
&
bottom
->
ga
,
n
*
batch_
bottom_stride
,
nChannels
,
bottomHeight
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
return
NULL
;
return
NULL
;
}
}
// Second, gemm
// Second, gemm
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
N_
,
M_
,
K_
,
1
,
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
&
col
->
ga
,
0
,
N_
,
N_
,
M_
,
K_
,
1
,
&
weight
->
ga
,
0
,
K_
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
0
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
&
top
->
ga
,
n
*
top_stride
,
N_
);
0
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
);
}
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM forward encountered an error running gemm: %d"
,
err
);
"GpuCorrMM forward encountered an error running gemm: %d"
,
err
);
...
@@ -533,7 +540,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -533,7 +540,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im2col
// First, im2col
err
=
im2col
(
&
bottom
->
ga
,
n
*
bottom_stride
,
err
=
im2col
(
&
bottom
->
ga
,
n
*
b
atch_b
ottom_stride
,
nChannels
,
bottomHeight
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
padH
,
padW
,
dH
,
dW
,
&
col
->
ga
);
...
@@ -545,12 +552,14 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -545,12 +552,14 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// Note that we accumulate into weight. We do so by setting beta = 0
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
// is faster than setting weight to all zeros before the loop.)
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
K_
,
M_
,
N_
,
1
,
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
&
col
->
ga
,
0
,
N_
,
K_
,
M_
,
N_
,
1
,
&
top
->
ga
,
n
*
top_stride
,
N_
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
(
n
==
0
)
?
0
:
1
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
&
weight
->
ga
,
0
,
K_
);
(
n
==
0
)
?
0
:
1
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
);
}
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad weights encountered an error running gemm: %d"
,
err
);
"GpuCorrMM grad weights encountered an error running gemm: %d"
,
err
);
...
@@ -575,13 +584,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -575,13 +584,15 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// full convolution: gemm, then col2im
// full convolution: gemm, then col2im
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// gemm into columns
// gemm into columns
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
g
++
){
N_
,
K_
,
M_
,
1
,
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
&
top
->
ga
,
n
*
top_stride
,
N_
,
N_
,
K_
,
M_
,
1
,
&
weight
->
ga
,
0
,
K_
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
0
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
&
col
->
ga
,
0
,
N_
);
0
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
);
}
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorrMM grad inputs encountered an error running gemm: %d"
,
err
);
"GpuCorrMM grad inputs encountered an error running gemm: %d"
,
err
);
...
@@ -591,7 +602,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
...
@@ -591,7 +602,7 @@ PyGpuArrayObject* corrMM(PyGpuArrayObject *const bottom,
// col2im back to the data
// col2im back to the data
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
err
=
col2im
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
&
bottom
->
ga
,
n
*
bottom_stride
);
dH
,
dW
,
&
bottom
->
ga
,
n
*
b
atch_b
ottom_stride
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
return
NULL
;
return
NULL
;
...
...
theano/gpuarray/dnn.py
浏览文件 @
c2e14ce1
差异被折叠。
点击展开。
theano/gpuarray/dnn_base.c
浏览文件 @
c2e14ce1
#section support_code
#section support_code
static
int
static
int
c_set_tensor
Nd
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
desc
)
{
c_set_tensor
_for_conv
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
desc
,
size_t
groups
)
{
cudnnDataType_t
dt
;
cudnnDataType_t
dt
;
size_t
ds
;
size_t
ds
;
switch
(
var
->
ga
.
typecode
)
{
switch
(
var
->
ga
.
typecode
)
{
...
@@ -42,7 +42,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
...
@@ -42,7 +42,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
strs
[
i
]
=
1
;
strs
[
i
]
=
1
;
dims
[
i
]
=
1
;
dims
[
i
]
=
1
;
}
}
//only for grouped convolution i.e when groups > 1
dims
[
1
]
=
dims
[
1
]
/
groups
;
cudnnStatus_t
err
=
cudnnSetTensorNdDescriptor
(
desc
,
dt
,
nd
<
3
?
3
:
nd
,
cudnnStatus_t
err
=
cudnnSetTensorNdDescriptor
(
desc
,
dt
,
nd
<
3
?
3
:
nd
,
dims
,
strs
);
dims
,
strs
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
...
@@ -54,6 +55,11 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
...
@@ -54,6 +55,11 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
return
0
;
return
0
;
}
}
static
int
c_set_tensorNd
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
desc
)
{
return
c_set_tensor_for_conv
(
var
,
desc
,
1
);
}
static
int
c_make_tensorNd
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
*
desc
)
{
static
int
c_make_tensorNd
(
PyGpuArrayObject
*
var
,
cudnnTensorDescriptor_t
*
desc
)
{
cudnnStatus_t
err
;
cudnnStatus_t
err
;
err
=
cudnnCreateTensorDescriptor
(
desc
);
err
=
cudnnCreateTensorDescriptor
(
desc
);
...
@@ -71,7 +77,7 @@ static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc)
...
@@ -71,7 +77,7 @@ static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc)
}
}
static
int
static
int
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
)
{
c_set_filter
(
PyGpuArrayObject
*
var
,
cudnnFilterDescriptor_t
desc
,
size_t
groups
)
{
cudnnDataType_t
dt
;
cudnnDataType_t
dt
;
cudnnStatus_t
err
;
cudnnStatus_t
err
;
...
@@ -111,6 +117,7 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
...
@@ -111,6 +117,7 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
/* Filters can't be less than 3d so we pad */
/* Filters can't be less than 3d so we pad */
for
(
unsigned
int
i
=
nd
;
i
<
3
;
i
++
)
for
(
unsigned
int
i
=
nd
;
i
<
3
;
i
++
)
dims
[
i
]
=
1
;
dims
[
i
]
=
1
;
dims
[
0
]
=
dims
[
0
]
/
groups
;
if
(
nd
<
3
)
if
(
nd
<
3
)
nd
=
3
;
nd
=
3
;
...
@@ -135,7 +142,7 @@ static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
...
@@ -135,7 +142,7 @@ static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
return
-
1
;
return
-
1
;
}
}
if
(
c_set_filter
(
var
,
*
desc
)
!=
0
)
{
if
(
c_set_filter
(
var
,
*
desc
,
1
)
!=
0
)
{
cudnnDestroyFilterDescriptor
(
*
desc
);
cudnnDestroyFilterDescriptor
(
*
desc
);
return
-
1
;
return
-
1
;
}
}
...
...
theano/gpuarray/dnn_fwd.c
浏览文件 @
c2e14ce1
...
@@ -29,7 +29,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -29,7 +29,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
float
af
=
alpha
,
bf
=
beta
;
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
]
*
params
->
num_groups
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same stack size"
);
"images and kernel must have the same stack size"
);
return
1
;
return
1
;
...
@@ -72,12 +72,15 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -72,12 +72,15 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
return
0
;
return
0
;
}
}
if
(
c_set_tensor
Nd
(
input
,
APPLY_SPECIFIC
(
input
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
input
,
APPLY_SPECIFIC
(
input
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
)
,
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_tensor
Nd
(
*
output
,
APPLY_SPECIFIC
(
output
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
*
output
,
APPLY_SPECIFIC
(
output
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
size_t
input_offset
=
PyGpuArray_STRIDE
(
input
,
0
)
/
params
->
num_groups
;
size_t
kern_offset
=
PyGpuArray_STRIDE
(
kerns
,
0
)
*
PyGpuArray_DIM
(
kerns
,
0
)
/
params
->
num_groups
;
size_t
output_offset
=
PyGpuArray_STRIDE
(
*
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionFwdAlgo_t
algo
=
params
->
conv_algo
;
cudnnConvolutionFwdAlgo_t
algo
=
params
->
conv_algo
;
...
@@ -281,15 +284,17 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
...
@@ -281,15 +284,17 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
cuda_wait
(
kerns
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
kerns
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
output
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
for
(
int
g
=
0
;
g
<
params
->
num_groups
;
g
++
)
{
err
=
cudnnConvolutionForward
(
err
=
cudnnConvolutionForward
(
params
->
handle
,
params
->
handle
,
alpha_p
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
)
+
input_offset
*
g
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
)
+
kern_offset
*
g
,
desc
,
algo
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
beta_p
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
));
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
)
+
output_offset
*
g
);
}
if
(
worksize
!=
0
)
if
(
worksize
!=
0
)
gpudata_release
(
workspace
);
gpudata_release
(
workspace
);
...
...
theano/gpuarray/dnn_gi.c
浏览文件 @
c2e14ce1
...
@@ -28,7 +28,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -28,7 +28,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
float
af
=
alpha
,
bf
=
beta
;
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
im
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
if
(
PyGpuArray_DIMS
(
im
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
]
*
params
->
num_groups
)
{
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same "
PyErr_SetString
(
PyExc_ValueError
,
"images and kernel must have the same "
"stack size"
);
"stack size"
);
return
1
;
return
1
;
...
@@ -71,12 +71,15 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -71,12 +71,15 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
return
0
;
return
0
;
}
}
if
(
c_set_tensor
Nd
(
output
,
APPLY_SPECIFIC
(
output
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
output
,
APPLY_SPECIFIC
(
output
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
)
,
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_tensor
Nd
(
*
input
,
APPLY_SPECIFIC
(
input
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
*
input
,
APPLY_SPECIFIC
(
input
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
size_t
input_offset
=
PyGpuArray_STRIDE
(
*
input
,
0
)
/
params
->
num_groups
;
size_t
kern_offset
=
PyGpuArray_STRIDE
(
kerns
,
0
)
*
PyGpuArray_DIM
(
kerns
,
0
)
/
params
->
num_groups
;
size_t
output_offset
=
PyGpuArray_STRIDE
(
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionBwdDataAlgo_t
algo
=
params
->
conv_algo
;
cudnnConvolutionBwdDataAlgo_t
algo
=
params
->
conv_algo
;
...
@@ -93,7 +96,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -93,7 +96,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
}
if
(
PyGpuArray_NDIM
(
im
)
==
4
)
{
if
(
PyGpuArray_NDIM
(
im
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
/
params
->
num_groups
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%ldx%ld"
...
@@ -286,14 +289,17 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
...
@@ -286,14 +289,17 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
input
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardData
(
for
(
int
g
=
0
;
g
<
params
->
num_groups
;
g
++
)
params
->
handle
,
{
alpha_p
,
err
=
cudnnConvolutionBackwardData
(
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
params
->
handle
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
alpha_p
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
)
+
kern_offset
*
g
,
beta_p
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
)
+
output_offset
*
g
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
));
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
*
input
)
+
input_offset
*
g
);
}
if
(
worksize
!=
0
)
if
(
worksize
!=
0
)
gpudata_release
(
workspace
);
gpudata_release
(
workspace
);
...
...
theano/gpuarray/dnn_gw.c
浏览文件 @
c2e14ce1
...
@@ -28,7 +28,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -28,7 +28,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
float
af
=
alpha
,
bf
=
beta
;
float
af
=
alpha
,
bf
=
beta
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
km
)[
1
])
{
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
km
)[
1
]
*
params
->
num_groups
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"GpuDnnConv images and kernel must have the same stack size"
);
"GpuDnnConv images and kernel must have the same stack size"
);
return
1
;
return
1
;
...
@@ -71,13 +71,17 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -71,13 +71,17 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
return
0
;
return
0
;
}
}
if
(
c_set_tensor
Nd
(
input
,
APPLY_SPECIFIC
(
input
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
input
,
APPLY_SPECIFIC
(
input
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_tensor
Nd
(
output
,
APPLY_SPECIFIC
(
output
)
)
==
-
1
)
if
(
c_set_tensor
_for_conv
(
output
,
APPLY_SPECIFIC
(
output
),
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
*
kerns
,
APPLY_SPECIFIC
(
kerns
)
,
params
->
num_groups
)
==
-
1
)
return
1
;
return
1
;
size_t
input_offset
=
PyGpuArray_STRIDE
(
input
,
0
)
/
params
->
num_groups
;
size_t
kern_offset
=
PyGpuArray_STRIDE
(
*
kerns
,
0
)
*
PyGpuArray_DIM
(
*
kerns
,
0
)
/
params
->
num_groups
;
size_t
output_offset
=
PyGpuArray_STRIDE
(
output
,
0
)
/
params
->
num_groups
;
cudnnConvolutionBwdFilterAlgo_t
algo
=
params
->
conv_algo
;
cudnnConvolutionBwdFilterAlgo_t
algo
=
params
->
conv_algo
;
cuda_enter
(
c
->
ctx
);
cuda_enter
(
c
->
ctx
);
...
@@ -93,7 +97,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -93,7 +97,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
(
PyGpuArray_NDIM
(
input
)
==
4
)
{
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
if
((
PyGpuArray_DIMS
(
output
)[
0
]
!=
expected_output_dims
[
0
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
1
]
/
params
->
num_groups
!=
expected_output_dims
[
1
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
2
]
!=
expected_output_dims
[
2
])
||
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
(
PyGpuArray_DIMS
(
output
)[
3
]
!=
expected_output_dims
[
3
]))
{
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%dx%ld"
PyErr_Format
(
PyExc_ValueError
,
"impossible convolution output dim: expected %ldx%ldx%dx%ld"
...
@@ -273,14 +277,18 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
...
@@ -273,14 +277,18 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
(
output
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_READ
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
cuda_wait
((
*
kerns
)
->
ga
.
data
,
GPUARRAY_CUDA_WAIT_WRITE
);
err
=
cudnnConvolutionBackwardFilter
(
for
(
int
g
=
0
;
g
<
params
->
num_groups
;
g
++
)
params
->
handle
,
{
alpha_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
err
=
cudnnConvolutionBackwardFilter
(
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
),
params
->
handle
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
alpha_p
,
beta_p
,
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
)
+
input_offset
*
g
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
));
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
output
)
+
output_offset
*
g
,
desc
,
algo
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
beta_p
,
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
*
kerns
)
+
kern_offset
*
g
);
}
if
(
worksize
!=
0
)
if
(
worksize
!=
0
)
gpudata_release
(
workspace
);
gpudata_release
(
workspace
);
...
...
theano/gpuarray/opt.py
浏览文件 @
c2e14ce1
...
@@ -1533,7 +1533,8 @@ def local_abstractconv_gemm(node):
...
@@ -1533,7 +1533,8 @@ def local_abstractconv_gemm(node):
border_mode
=
node
.
op
.
border_mode
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
filter_dilation
=
node
.
op
.
filter_dilation
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))):
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
))
and
node
.
op
.
num_groups
==
1
):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
# need to dimshuffle the kernel for full convolution
# need to dimshuffle the kernel for full convolution
...
@@ -1550,8 +1551,9 @@ def local_abstractconv_gemm(node):
...
@@ -1550,8 +1551,9 @@ def local_abstractconv_gemm(node):
# By default use GpuCorrMM
# By default use GpuCorrMM
rval
=
GpuCorrMM
(
border_mode
,
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
),
filter_dilation
,
gpu_contiguous
(
kern
))
node
.
op
.
num_groups
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
# call GpuCorrMM_gradWeights if good
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth
# (the latter is faster if batchsize * kernelHeight * kernelWidth
...
@@ -1669,7 +1671,8 @@ def local_abstractconv_gradweights_gemm(node):
...
@@ -1669,7 +1671,8 @@ def local_abstractconv_gradweights_gemm(node):
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
GpuCorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
...
@@ -1713,7 +1716,8 @@ def local_abstractconv_gradinputs_gemm(node):
...
@@ -1713,7 +1716,8 @@ def local_abstractconv_gradinputs_gemm(node):
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
GpuCorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
return
[
rval
]
return
[
rval
]
...
...
theano/gpuarray/tests/test_dnn.py
浏览文件 @
c2e14ce1
...
@@ -25,6 +25,7 @@ from . import test_nnet
...
@@ -25,6 +25,7 @@ from . import test_nnet
from
.rnn_support
import
Model
,
GRU
,
LSTM
,
WrapperLayer
from
.rnn_support
import
Model
,
GRU
,
LSTM
,
WrapperLayer
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
try
:
try
:
import
pygpu
import
pygpu
...
@@ -2263,3 +2264,37 @@ def test_dnn_rnn_lstm_grad_c():
...
@@ -2263,3 +2264,37 @@ def test_dnn_rnn_lstm_grad_c():
(
i
+
1
)
*
len
(
cudnn_grads_layer
)]
(
i
+
1
)
*
len
(
cudnn_grads_layer
)]
for
j
,
g
in
enumerate
(
cudnn_grads_layer
):
for
j
,
g
in
enumerate
(
cudnn_grads_layer
):
utt
.
assert_allclose
(
ref_grads_layer
[
j
],
g
)
utt
.
assert_allclose
(
ref_grads_layer
[
j
],
g
)
def
dconv2d
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
):
def
dconv
(
img
,
kern
):
return
dnn
.
dnn_conv
(
img
,
kern
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
'conv'
,
direction_hint
=
'forward'
,
workmem
=
None
,
algo
=
None
,
precision
=
None
,
num_groups
=
num_groups
)
return
dconv
def
dconv2dw
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
):
def
dconvw
(
img
,
topgrad
,
kshp
):
return
dnn
.
dnn_gradweight
(
img
,
topgrad
,
kshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
'conv'
,
precision
=
None
,
algo
=
None
,
num_groups
=
num_groups
)
return
dconvw
def
dconv2di
(
border_mode
,
subsample
,
filter_dilation
,
num_groups
):
def
dconvi
(
kern
,
topgrad
,
imshp
):
return
dnn
.
dnn_gradinput
(
kern
,
topgrad
,
imshp
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
'conv'
,
precision
=
None
,
algo
=
None
,
num_groups
=
num_groups
)
return
dconvi
class
Cudnn_grouped_conv
(
Grouped_conv_noOptim
):
mode
=
mode_with_gpu
conv2d
=
staticmethod
(
dconv2d
)
conv2d_gradw
=
staticmethod
(
dconv2dw
)
conv2d_gradi
=
staticmethod
(
dconv2di
)
conv2d_op
=
dnn
.
GpuDnnConv
conv2d_gradw_op
=
dnn
.
GpuDnnConvGradW
conv2d_gradi_op
=
dnn
.
GpuDnnConvGradI
flip_filter
=
False
is_dnn
=
True
theano/gpuarray/tests/test_gemmcorr.py
浏览文件 @
c2e14ce1
...
@@ -11,6 +11,7 @@ from theano.tensor.nnet.corr import CorrMM, CorrMM_gradWeights, CorrMM_gradInput
...
@@ -11,6 +11,7 @@ from theano.tensor.nnet.corr import CorrMM, CorrMM_gradWeights, CorrMM_gradInput
from
..type
import
gpuarray_shared_constructor
from
..type
import
gpuarray_shared_constructor
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
..blas
import
GpuCorrMM
,
GpuCorrMM_gradWeights
,
GpuCorrMM_gradInputs
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
class
TestCorrMM
(
unittest
.
TestCase
):
class
TestCorrMM
(
unittest
.
TestCase
):
...
@@ -219,3 +220,15 @@ class TestCorrMM(unittest.TestCase):
...
@@ -219,3 +220,15 @@ class TestCorrMM(unittest.TestCase):
verify_grad
=
False
)
verify_grad
=
False
)
self
.
run_gradinput
(
inputs_shape
=
(
1
,
1024
,
3
,
1
),
self
.
run_gradinput
(
inputs_shape
=
(
1
,
1024
,
3
,
1
),
filters_shape
=
(
1
,
1
,
1
,
1024
))
filters_shape
=
(
1
,
1
,
1
,
1024
))
class
TestGroupGpuCorr2d
(
Grouped_conv_noOptim
):
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
conv2d
=
GpuCorrMM
conv2d_gradw
=
GpuCorrMM_gradWeights
conv2d_gradi
=
GpuCorrMM_gradInputs
conv2d_op
=
GpuCorrMM
conv2d_gradw_op
=
GpuCorrMM_gradWeights
conv2d_gradi_op
=
GpuCorrMM_gradInputs
flip_filter
=
True
is_dnn
=
False
theano/tensor/nnet/__init__.py
浏览文件 @
c2e14ce1
...
@@ -39,7 +39,7 @@ from .abstract_conv import conv3d
...
@@ -39,7 +39,7 @@ from .abstract_conv import conv3d
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
def
conv2d
(
input
,
filters
,
input_shape
=
None
,
filter_shape
=
None
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_flip
=
True
,
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
**
kwargs
):
image_shape
=
None
,
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
**
kwargs
):
"""
"""
This function will build the symbolic graph for convolving a mini-batch of a
This function will build the symbolic graph for convolving a mini-batch of a
stack of 2D inputs with a set of 2D filters. The implementation is modelled
stack of 2D inputs with a set of 2D filters. The implementation is modelled
...
@@ -103,6 +103,10 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
...
@@ -103,6 +103,10 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
Factor by which to subsample (stride) the input.
Factor by which to subsample (stride) the input.
Also called dilation elsewhere.
Also called dilation elsewhere.
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
kwargs: Any other keyword arguments are accepted for backwards
kwargs: Any other keyword arguments are accepted for backwards
compatibility, but will be ignored.
compatibility, but will be ignored.
...
@@ -152,12 +156,12 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
...
@@ -152,12 +156,12 @@ def conv2d(input, filters, input_shape=None, filter_shape=None,
return
abstract_conv2d
(
input
,
filters
,
input_shape
,
filter_shape
,
return
abstract_conv2d
(
input
,
filters
,
input_shape
,
filter_shape
,
border_mode
,
subsample
,
filter_flip
,
border_mode
,
subsample
,
filter_flip
,
filter_dilation
)
filter_dilation
,
num_groups
)
def
conv2d_transpose
(
input
,
filters
,
output_shape
,
filter_shape
=
None
,
def
conv2d_transpose
(
input
,
filters
,
output_shape
,
filter_shape
=
None
,
border_mode
=
'valid'
,
input_dilation
=
(
1
,
1
),
border_mode
=
'valid'
,
input_dilation
=
(
1
,
1
),
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)):
filter_flip
=
True
,
filter_dilation
=
(
1
,
1
)
,
num_groups
=
1
):
"""
"""
This function will build the symbolic graph for applying a transposed
This function will build the symbolic graph for applying a transposed
convolution over a mini-batch of a stack of 2D inputs with a set of 2D
convolution over a mini-batch of a stack of 2D inputs with a set of 2D
...
@@ -209,6 +213,10 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
...
@@ -209,6 +213,10 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
Factor by which to subsample (stride) the input.
Factor by which to subsample (stride) the input.
Also called dilation elsewhere.
Also called dilation elsewhere.
num_groups : int
Divides the image, kernel and output tensors into num_groups
separate groups. Each which carry out convolutions separately
Returns
Returns
-------
-------
Symbolic 4D tensor
Symbolic 4D tensor
...
@@ -235,4 +243,5 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
...
@@ -235,4 +243,5 @@ def conv2d_transpose(input, filters, output_shape, filter_shape=None,
border_mode
=
border_mode
,
border_mode
=
border_mode
,
subsample
=
input_dilation
,
subsample
=
input_dilation
,
filter_flip
=
filter_flip
,
filter_flip
=
filter_flip
,
filter_dilation
=
filter_dilation
)
filter_dilation
=
filter_dilation
,
num_groups
=
num_groups
)
theano/tensor/nnet/abstract_conv.py
浏览文件 @
c2e14ce1
差异被折叠。
点击展开。
theano/tensor/nnet/corr.py
浏览文件 @
c2e14ce1
...
@@ -40,9 +40,11 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -40,9 +40,11 @@ class BaseCorrMM(gof.OpenMPOp):
Perform subsampling of the output (default: (1, 1)).
Perform subsampling of the output (default: (1, 1)).
filter_dilation
filter_dilation
Perform dilated correlation (default: (1,1))
Perform dilated correlation (default: (1,1))
num_groups
Perform grouped convolutions (default: 1)
"""
"""
check_broadcast
=
False
check_broadcast
=
False
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
)
__props__
=
(
'border_mode'
,
'subsample'
,
'filter_dilation'
,
'num_groups'
)
_direction
=
None
_direction
=
None
...
@@ -51,10 +53,11 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -51,10 +53,11 @@ class BaseCorrMM(gof.OpenMPOp):
(
'DIRECTION_BACKPROP_INPUTS'
,
'backprop inputs'
)),
# 2
(
'DIRECTION_BACKPROP_INPUTS'
,
'backprop inputs'
)),
# 2
dH
=
int64
,
dW
=
int64
,
dH
=
int64
,
dW
=
int64
,
dilH
=
int64
,
dilW
=
int64
,
dilH
=
int64
,
dilW
=
int64
,
padH
=
int64
,
padW
=
int64
)
padH
=
int64
,
padW
=
int64
,
num_groups
=
int64
)
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
def
__init__
(
self
,
border_mode
=
"valid"
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
),
openmp
=
None
):
filter_dilation
=
(
1
,
1
),
num_groups
=
1
,
openmp
=
None
):
super
(
BaseCorrMM
,
self
)
.
__init__
(
openmp
=
openmp
)
super
(
BaseCorrMM
,
self
)
.
__init__
(
openmp
=
openmp
)
if
isinstance
(
border_mode
,
integer_types
):
if
isinstance
(
border_mode
,
integer_types
):
if
border_mode
<
0
:
if
border_mode
<
0
:
...
@@ -97,6 +100,9 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -97,6 +100,9 @@ class BaseCorrMM(gof.OpenMPOp):
if
self
.
_direction
not
in
[
"forward"
,
"backprop weights"
,
"backprop inputs"
]:
if
self
.
_direction
not
in
[
"forward"
,
"backprop weights"
,
"backprop inputs"
]:
raise
ValueError
(
"_direction must be one of 'forward', "
raise
ValueError
(
"_direction must be one of 'forward', "
"'backprop weights', 'backprop inputs'"
)
"'backprop weights', 'backprop inputs'"
)
if
num_groups
<
1
:
raise
ValueError
(
"Number of groups should be greater than 0"
)
self
.
num_groups
=
num_groups
@property
@property
def
pad
(
self
):
def
pad
(
self
):
...
@@ -124,11 +130,12 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -124,11 +130,12 @@ class BaseCorrMM(gof.OpenMPOp):
padW
=
property
(
lambda
self
:
self
.
pad
[
1
])
padW
=
property
(
lambda
self
:
self
.
pad
[
1
])
def
__str__
(
self
):
def
__str__
(
self
):
return
'
%
s{
%
s,
%
s,
%
s}'
%
(
return
'
%
s{
%
s,
%
s,
%
s
,
%
s
}'
%
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
,
self
.
border_mode
,
self
.
border_mode
,
str
(
self
.
subsample
),
str
(
self
.
subsample
),
str
(
self
.
filter_dilation
))
str
(
self
.
filter_dilation
),
str
(
self
.
num_groups
))
@staticmethod
@staticmethod
def
as_common_dtype
(
in1
,
in2
):
def
as_common_dtype
(
in1
,
in2
):
...
@@ -138,6 +145,11 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -138,6 +145,11 @@ class BaseCorrMM(gof.OpenMPOp):
dtype
=
theano
.
scalar
.
upcast
(
in1
.
dtype
,
in2
.
dtype
)
dtype
=
theano
.
scalar
.
upcast
(
in1
.
dtype
,
in2
.
dtype
)
return
in1
.
astype
(
dtype
),
in2
.
astype
(
dtype
)
return
in1
.
astype
(
dtype
),
in2
.
astype
(
dtype
)
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
if
not
hasattr
(
self
,
'num_groups'
):
self
.
num_groups
=
1
def
c_support_code
(
self
):
def
c_support_code
(
self
):
ccodes
=
blas_headers
.
blas_header_text
()
ccodes
=
blas_headers
.
blas_header_text
()
if
self
.
blas_type
==
'openblas'
:
if
self
.
blas_type
==
'openblas'
:
...
@@ -167,7 +179,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -167,7 +179,7 @@ class BaseCorrMM(gof.OpenMPOp):
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
# raise this whenever modifying any of the support_code_files
# raise this whenever modifying any of the support_code_files
return
(
6
,
self
.
openmp
,
blas_header_version
())
return
(
7
,
self
.
openmp
,
blas_header_version
())
def
c_support_code_apply
(
self
,
node
,
nodename
):
def
c_support_code_apply
(
self
,
node
,
nodename
):
# REMEMBER TO RAISE c_code_cache_version when changing any of
# REMEMBER TO RAISE c_code_cache_version when changing any of
...
@@ -274,6 +286,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -274,6 +286,7 @@ class BaseCorrMM(gof.OpenMPOp):
int dilW =
%(params)
s->dilW;
int dilW =
%(params)
s->dilW;
int padH =
%(params)
s->padH;
int padH =
%(params)
s->padH;
int padW =
%(params)
s->padW;
int padW =
%(params)
s->padW;
int numgroups =
%(params)
s->num_groups;
PyArrayObject * bottom =
%(bottom)
s;
PyArrayObject * bottom =
%(bottom)
s;
PyArrayObject * weights =
%(weights)
s;
PyArrayObject * weights =
%(weights)
s;
...
@@ -386,7 +399,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -386,7 +399,7 @@ class BaseCorrMM(gof.OpenMPOp):
// output is weights: (num_filters, num_channels, height, width)
// output is weights: (num_filters, num_channels, height, width)
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
// height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1]
/ numgroups
;
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[2] = (npy_intp)kH; // already inferred further above
out_dim[3] = (npy_intp)kW; // how convenient
out_dim[3] = (npy_intp)kW; // how convenient
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
...
@@ -409,7 +422,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -409,7 +422,7 @@ class BaseCorrMM(gof.OpenMPOp):
// output is bottom: (batchsize, num_channels, height, width)
// output is bottom: (batchsize, num_channels, height, width)
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
// height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1];
out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1]
* numgroups
;
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[2] = (npy_intp)((
%(height)
s != -1) ?
%(height)
s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
out_dim[3] = (npy_intp)((
%(width)
s != -1) ?
%(width)
s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW);
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
...
@@ -465,7 +478,7 @@ class BaseCorrMM(gof.OpenMPOp):
...
@@ -465,7 +478,7 @@ class BaseCorrMM(gof.OpenMPOp):
}
}
// Call corrMM code
// Call corrMM code
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW);
out2 = corrMM(
%(bottom)
s,
%(weights)
s,
%(top)
s, direction, dH, dW, dilH, dilW, padH, padW
, numgroups
);
if (out2==NULL){
if (out2==NULL){
%(fail)
s
%(fail)
s
}
}
...
@@ -541,12 +554,14 @@ class CorrMM(BaseCorrMM):
...
@@ -541,12 +554,14 @@ class CorrMM(BaseCorrMM):
top
,
=
grads
top
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
)(
weights
,
top
,
self
.
filter_dilation
,
bottom
.
shape
[
-
2
:])
self
.
num_groups
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
top
,
self
.
filter_dilation
,
weights
.
shape
[
-
2
:])
self
.
num_groups
)(
bottom
,
top
,
weights
.
shape
[
-
2
:])
return
d_bottom
,
d_weights
return
d_bottom
,
d_weights
...
@@ -600,6 +615,7 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -600,6 +615,7 @@ class CorrMM_gradWeights(BaseCorrMM):
imshp
=
input_shape
[
0
]
imshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
topshp
=
input_shape
[
1
]
ssize
,
imshp
=
imshp
[
1
],
list
(
imshp
[
2
:])
ssize
,
imshp
=
imshp
[
1
],
list
(
imshp
[
2
:])
ssize
=
ssize
//
self
.
num_groups
nkern
,
topshp
=
topshp
[
1
],
list
(
topshp
[
2
:])
nkern
,
topshp
=
topshp
[
1
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
height_width
=
node
.
inputs
[
-
2
:]
if
((
dH
!=
1
)
or
(
padH
==
-
1
)):
if
((
dH
!=
1
)
or
(
padH
==
-
1
)):
...
@@ -632,11 +648,13 @@ class CorrMM_gradWeights(BaseCorrMM):
...
@@ -632,11 +648,13 @@ class CorrMM_gradWeights(BaseCorrMM):
weights
,
=
grads
weights
,
=
grads
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
d_bottom
=
CorrMM_gradInputs
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
)(
weights
,
top
,
self
.
filter_dilation
,
bottom
.
shape
[
-
2
:])
self
.
num_groups
)(
weights
,
top
,
bottom
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
if
len
(
inp
)
==
4
else
())
return
(
d_bottom
,
d_top
)
+
d_height_width
return
(
d_bottom
,
d_top
)
+
d_height_width
...
@@ -678,8 +696,12 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -678,8 +696,12 @@ class CorrMM_gradInputs(BaseCorrMM):
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
height_width
=
[
as_tensor_variable
(
shape
[
0
])
.
astype
(
'int64'
),
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
as_tensor_variable
(
shape
[
1
])
.
astype
(
'int64'
)]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
if
self
.
num_groups
>
1
:
False
,
False
]
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
False
,
False
,
False
]
else
:
broadcastable
=
[
topgrad
.
type
.
broadcastable
[
0
],
kern
.
type
.
broadcastable
[
1
],
False
,
False
]
dtype
=
kern
.
type
.
dtype
dtype
=
kern
.
type
.
dtype
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
return
Apply
(
self
,
[
kern
,
topgrad
]
+
height_width
,
[
TensorType
(
dtype
,
broadcastable
)()])
[
TensorType
(
dtype
,
broadcastable
)()])
...
@@ -698,6 +720,7 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -698,6 +720,7 @@ class CorrMM_gradInputs(BaseCorrMM):
kshp
=
input_shape
[
0
]
kshp
=
input_shape
[
0
]
topshp
=
input_shape
[
1
]
topshp
=
input_shape
[
1
]
ssize
,
kshp
=
kshp
[
1
],
list
(
kshp
[
2
:])
ssize
,
kshp
=
kshp
[
1
],
list
(
kshp
[
2
:])
ssize
=
ssize
*
self
.
num_groups
bsize
,
topshp
=
topshp
[
0
],
list
(
topshp
[
2
:])
bsize
,
topshp
=
topshp
[
0
],
list
(
topshp
[
2
:])
height_width
=
node
.
inputs
[
-
2
:]
height_width
=
node
.
inputs
[
-
2
:]
if
padH
==
-
1
:
if
padH
==
-
1
:
...
@@ -738,12 +761,14 @@ class CorrMM_gradInputs(BaseCorrMM):
...
@@ -738,12 +761,14 @@ class CorrMM_gradInputs(BaseCorrMM):
bottom
,
=
grads
bottom
,
=
grads
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
d_weights
=
CorrMM_gradWeights
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
self
.
filter_dilation
,
top
,
self
.
num_groups
)(
bottom
,
weights
.
shape
[
-
2
:])
top
,
weights
.
shape
[
-
2
:])
d_top
=
CorrMM
(
self
.
border_mode
,
d_top
=
CorrMM
(
self
.
border_mode
,
self
.
subsample
,
self
.
subsample
,
self
.
filter_dilation
)(
bottom
,
weights
)
self
.
filter_dilation
,
self
.
num_groups
)(
bottom
,
weights
)
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
d_height_width
=
((
theano
.
gradient
.
DisconnectedType
()(),)
*
2
if
len
(
inp
)
==
4
else
())
2
if
len
(
inp
)
==
4
else
())
return
(
d_weights
,
d_top
)
+
d_height_width
return
(
d_weights
,
d_top
)
+
d_height_width
...
...
theano/tensor/nnet/corr_gemm.c
浏览文件 @
c2e14ce1
...
@@ -106,7 +106,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -106,7 +106,8 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
dilH
=
1
,
const
int
dilH
=
1
,
const
int
dilW
=
1
,
const
int
dilW
=
1
,
const
int
padH
=
0
,
const
int
padH
=
0
,
const
int
padW
=
0
)
const
int
padW
=
0
,
const
int
numgroups
=
1
)
{
{
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
if
(
PyArray_NDIM
(
bottom
)
!=
4
)
{
{
...
@@ -155,7 +156,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -155,7 +156,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
const
int
nFilters
=
PyArray_DIMS
(
weight
)[
0
];
const
int
nFilters
=
PyArray_DIMS
(
weight
)[
0
];
const
int
kH
=
PyArray_DIMS
(
weight
)[
2
];
const
int
kH
=
PyArray_DIMS
(
weight
)[
2
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
3
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
3
];
if
(
nChannels
!=
PyArray_DIMS
(
weight
)[
1
]
)
{
if
(
nChannels
!=
(
PyArray_DIMS
(
weight
)[
1
]
*
numgroups
)
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM images and kernel must have the same stack size
\n
"
);
"CorrMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
return
NULL
;
...
@@ -214,12 +215,16 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -214,12 +215,16 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
}
}
// Define some useful variables
// Define some useful variables
const
int
bottom_stride
=
PyArray_STRIDES
(
bottom
)[
0
]
/%
(
n_bytes
)
f
;
const
int
batch_bottom_stride
=
PyArray_STRIDES
(
bottom
)[
0
]
/%
(
n_bytes
)
f
;
const
int
top_stride
=
PyArray_STRIDES
(
top
)[
0
]
/%
(
n_bytes
)
f
;
const
int
group_bottom_stride
=
(
PyArray_STRIDES
(
bottom
)[
1
]
*
nChannels
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
K_
=
col_dim
[
1
];
const
int
batch_top_stride
=
PyArray_STRIDES
(
top
)[
0
]
/%
(
n_bytes
)
f
;
const
int
group_top_stride
=
(
PyArray_STRIDES
(
top
)[
1
]
*
nFilters
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
K_
=
col_dim
[
1
]
/
numgroups
;
const
int
N_
=
col_dim
[
2
];
const
int
N_
=
col_dim
[
2
];
const
int
col_stride
=
(
K_
*
N_
);
const
int
col_stride
=
(
K_
*
N_
*
numgroups
);
const
int
M_
=
nFilters
;
const
int
group_col_stride
=
(
K_
*
N_
);
const
int
group_weight_stride
=
(
PyArray_STRIDES
(
weight
)[
0
]
*
nFilters
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
M_
=
nFilters
/
numgroups
;
const
%
(
c_float_type
)
s
one
=
1
.
0
;
const
%
(
c_float_type
)
s
one
=
1
.
0
;
const
%
(
c_float_type
)
s
zero
=
0
.
0
;
const
%
(
c_float_type
)
s
zero
=
0
.
0
;
char
NTrans
=
'N'
;
char
NTrans
=
'N'
;
...
@@ -253,17 +258,19 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -253,17 +258,19 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
int
tid
=
%
(
omp_get_thread_num
)
s
;
int
tid
=
%
(
omp_get_thread_num
)
s
;
// First, im2col
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
ottom_stride
,
nChannels
,
bottomHeight
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
atch_bottom_stride
,
nChannels
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
bottom
Height
,
bottom
Width
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
// Second, gemm
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
// Second, gemm
&
N_
,
&
M_
,
&
K_
,
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
&
one
,
&
N_
,
&
M_
,
&
K_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
&
K_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
);
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
,
&
N_
);
}
}
}
// Restore to previous blas threads
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
...
@@ -304,7 +311,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -304,7 +311,7 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
output
=
weight
;
output
=
weight
;
npy_intp
weight_dim
[
2
];
npy_intp
weight_dim
[
2
];
weight_dim
[
0
]
=
(
npy_intp
)
max_threads
;
weight_dim
[
0
]
=
(
npy_intp
)
max_threads
;
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
);
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
*
numgroups
);
PyArrayObject
*
local_weight
=
(
PyArrayObject
*
)
PyArray_ZEROS
(
2
,
PyArrayObject
*
local_weight
=
(
PyArrayObject
*
)
PyArray_ZEROS
(
2
,
weight_dim
,
PyArray_TYPE
(
weight
),
0
);
weight_dim
,
PyArray_TYPE
(
weight
),
0
);
...
@@ -326,21 +333,23 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -326,21 +333,23 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
int
tid
=
%
(
omp_get_thread_num
)
s
;
int
tid
=
%
(
omp_get_thread_num
)
s
;
// First, im2col
// First, im2col
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
ottom_stride
,
nChannels
,
bottomHeight
,
im2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
atch_bottom_stride
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
// Second, gemm
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// Note that we accumulate into weight. We do so by setting beta = 0
// Second, gemm
// for the first iteration and beta = 1 for subsequent ones. (This
// Note that we accumulate into weight. We do so by setting beta = 0
// is faster than setting weight to all zeros before the loop.)
// for the first iteration and beta = 1 for subsequent ones. (This
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
// is faster than setting weight to all zeros before the loop.)
&
K_
,
&
M_
,
&
N_
,
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
&
one
,
&
K_
,
&
M_
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
&
N_
,
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
(
n
==
0
)
?
&
zero
:
&
one
,
tid
*
weight_dim
[
1
],
&
K_
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
g
*
group_weight_stride
+
tid
*
weight_dim
[
1
],
&
K_
);
}
}
}
// Restore to previous blas threads
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
...
@@ -401,19 +410,21 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
...
@@ -401,19 +410,21 @@ PyArrayObject* corrMM(PyArrayObject* bottom,
%
(
blas_set_num_threads
)
s
(
1
);
%
(
blas_set_num_threads
)
s
(
1
);
%
(
omp_flags
)
s
%
(
omp_flags
)
s
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
// gemm into columns
int
tid
=
%
(
omp_get_thread_num
)
s
;
int
tid
=
%
(
omp_get_thread_num
)
s
;
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
&
N_
,
&
K_
,
&
M_
,
// gemm into columns
&
one
,
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
,
&
N_
,
&
K_
,
&
M_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
&
K_
,
&
one
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
g
*
group_top_stride
+
n
*
batch_top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
&
N_
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
);
}
// col2im back to the data
// col2im back to the data
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
col2im
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
kH
,
kW
,
dilH
,
dilW
,
padH
,
padW
,
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
);
dH
,
dW
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
atch_b
ottom_stride
);
}
}
// Restore to previous blas threads
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
...
...
theano/tensor/nnet/opt.py
浏览文件 @
c2e14ce1
...
@@ -88,7 +88,9 @@ def local_abstractconv_gemm(node):
...
@@ -88,7 +88,9 @@ def local_abstractconv_gemm(node):
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
CorrMM
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
CorrMM
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
kern
)
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
kern
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
return
[
rval
]
...
@@ -133,7 +135,8 @@ def local_abstractconv_gradweight_gemm(node):
...
@@ -133,7 +135,8 @@ def local_abstractconv_gradweight_gemm(node):
rval
=
CorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
CorrMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
topgrad
,
shape
)
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
# need to flip the kernel if necessary
# need to flip the kernel if necessary
...
@@ -190,8 +193,9 @@ def local_abstractconv_gradinputs_gemm(node):
...
@@ -190,8 +193,9 @@ def local_abstractconv_gradinputs_gemm(node):
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
CorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
CorrMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
kern
,
topgrad
,
filter_dilation
=
node
.
op
.
filter_dilation
,
shape
)
num_groups
=
node
.
op
.
num_groups
)(
kern
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
return
[
rval
]
...
@@ -238,6 +242,8 @@ def local_conv2d_cpu(node):
...
@@ -238,6 +242,8 @@ def local_conv2d_cpu(node):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
# Not tested yet
# Not tested yet
return
None
return
None
if
node
.
op
.
num_groups
>
1
:
return
None
rval
=
conv2d
(
img
,
kern
,
rval
=
conv2d
(
img
,
kern
,
node
.
op
.
imshp
,
node
.
op
.
kshp
,
node
.
op
.
imshp
,
node
.
op
.
kshp
,
...
@@ -295,6 +301,8 @@ def local_conv2d_gradweight_cpu(node):
...
@@ -295,6 +301,8 @@ def local_conv2d_gradweight_cpu(node):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
# Not tested yet
# Not tested yet
return
return
if
node
.
op
.
num_groups
>
1
:
return
None
if
node
.
op
.
border_mode
==
'valid'
and
\
if
node
.
op
.
border_mode
==
'valid'
and
\
(
node
.
op
.
subsample
!=
(
1
,
1
)):
(
node
.
op
.
subsample
!=
(
1
,
1
)):
...
@@ -447,6 +455,8 @@ def local_conv2d_gradinputs_cpu(node):
...
@@ -447,6 +455,8 @@ def local_conv2d_gradinputs_cpu(node):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
# Not tested yet
# Not tested yet
return
None
return
None
if
node
.
op
.
num_groups
>
1
:
return
None
# Conv 3d implementation, needed when subsample > 2
# Conv 3d implementation, needed when subsample > 2
if
node
.
op
.
border_mode
==
'valid'
and
node
.
op
.
subsample
!=
(
1
,
1
):
if
node
.
op
.
border_mode
==
'valid'
and
node
.
op
.
subsample
!=
(
1
,
1
):
...
...
theano/tensor/nnet/tests/test_abstract_conv.py
浏览文件 @
c2e14ce1
...
@@ -1699,3 +1699,158 @@ class TestConv2dGrads(unittest.TestCase):
...
@@ -1699,3 +1699,158 @@ class TestConv2dGrads(unittest.TestCase):
)
)
f_new
=
theano
.
function
([
self
.
x
,
self
.
output_grad_wrt
],
conv_wrt_w_out
)
f_new
=
theano
.
function
([
self
.
x
,
self
.
output_grad_wrt
],
conv_wrt_w_out
)
utt
.
assert_allclose
(
f_new
(
input_val
,
out_grad_val
),
f_old
(
input_val
,
filter_val
,
out_grad_val
))
utt
.
assert_allclose
(
f_new
(
input_val
,
out_grad_val
),
f_old
(
input_val
,
filter_val
,
out_grad_val
))
class
Grouped_conv_noOptim
(
unittest
.
TestCase
):
conv2d
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
conv2d_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d
conv2d_gradw_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradWeights
conv2d_gradi_op
=
theano
.
tensor
.
nnet
.
abstract_conv
.
AbstractConv2d_gradInputs
mode
=
theano
.
Mode
(
optimizer
=
None
)
flip_filter
=
False
is_dnn
=
False
def
setUp
(
self
):
self
.
num_groups
=
[
3
,
2
,
4
,
4
]
self
.
border_mode
=
'valid'
self
.
subsample
=
(
1
,
1
)
self
.
img_shape
=
[(
5
,
6
,
5
,
5
),
(
4
,
4
,
7
,
5
),
(
3
,
8
,
5
,
3
),
(
2
,
4
,
7
,
7
)]
self
.
kern_shape
=
[(
6
,
2
,
3
,
3
),
(
6
,
2
,
5
,
3
),
(
4
,
2
,
3
,
3
),
(
4
,
1
,
3
,
5
)]
self
.
top_shape
=
[(
5
,
6
,
3
,
3
),
(
4
,
6
,
3
,
3
),
(
3
,
4
,
3
,
1
),
(
2
,
4
,
5
,
3
)]
self
.
filter_dilation
=
(
1
,
1
)
self
.
ref_mode
=
'FAST_RUN'
if
theano
.
config
.
cxx
==
""
:
raise
SkipTest
(
"CorrMM needs cxx"
)
def
test_fwd
(
self
):
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
kern_sym
=
theano
.
tensor
.
tensor4
(
'kern'
)
for
imshp
,
kshp
,
groups
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
num_groups
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
split_imgs
=
np
.
split
(
img
,
groups
,
axis
=
1
)
split_kern
=
np
.
split
(
kern
,
groups
,
axis
=
0
)
grouped_conv_op
=
self
.
conv2d
(
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
)
if
self
.
flip_filter
:
grouped_conv_output
=
grouped_conv_op
(
img_sym
,
kern_sym
[:,
:,
::
-
1
,
::
-
1
])
else
:
grouped_conv_output
=
grouped_conv_op
(
img_sym
,
kern_sym
)
grouped_func
=
theano
.
function
([
img_sym
,
kern_sym
],
grouped_conv_output
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_op
)
for
node
in
grouped_func
.
maker
.
fgraph
.
toposort
()])
grouped_output
=
grouped_func
(
img
,
kern
)
ref_conv_op
=
conv2d_corr
(
img_sym
,
kern_sym
,
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
)
ref_func
=
theano
.
function
([
img_sym
,
kern_sym
],
ref_conv_op
,
mode
=
self
.
ref_mode
)
ref_concat_output
=
[
ref_func
(
img_arr
,
kern_arr
)
for
img_arr
,
kern_arr
in
zip
(
split_imgs
,
split_kern
)]
ref_concat_output
=
np
.
concatenate
(
ref_concat_output
,
axis
=
1
)
utt
.
assert_allclose
(
grouped_output
,
ref_concat_output
)
utt
.
verify_grad
(
grouped_conv_op
,
[
img
,
kern
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradweights
(
self
):
img_sym
=
theano
.
tensor
.
tensor4
(
'img'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
for
imshp
,
kshp
,
tshp
,
groups
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
top_shape
,
self
.
num_groups
):
img
=
np
.
random
.
random
(
imshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
tshp
)
.
astype
(
theano
.
config
.
floatX
)
split_imgs
=
np
.
split
(
img
,
groups
,
axis
=
1
)
split_top
=
np
.
split
(
top
,
groups
,
axis
=
1
)
grouped_convgrad_op
=
self
.
conv2d_gradw
(
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
)
grouped_conv_output
=
grouped_convgrad_op
(
img_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
kshp
if
self
.
is_dnn
else
kshp
[
-
2
:]))
if
self
.
flip_filter
:
grouped_conv_output
=
grouped_conv_output
[:,
:,
::
-
1
,
::
-
1
]
grouped_func
=
theano
.
function
([
img_sym
,
top_sym
],
grouped_conv_output
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradw_op
)
for
node
in
grouped_func
.
maker
.
fgraph
.
toposort
()])
grouped_output
=
grouped_func
(
img
,
top
)
ref_conv_op
=
conv2d_corr_gw
(
img_sym
,
top_sym
,
kshp
,
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
)
ref_func
=
theano
.
function
([
img_sym
,
top_sym
],
ref_conv_op
,
mode
=
self
.
ref_mode
)
ref_concat_output
=
[
ref_func
(
img_arr
,
top_arr
)
for
img_arr
,
top_arr
in
zip
(
split_imgs
,
split_top
)]
ref_concat_output
=
np
.
concatenate
(
ref_concat_output
,
axis
=
0
)
utt
.
assert_allclose
(
grouped_output
,
ref_concat_output
)
def
conv_gradweight
(
inputs_val
,
output_val
):
return
grouped_convgrad_op
(
inputs_val
,
output_val
,
tensor
.
as_tensor_variable
(
kshp
if
self
.
is_dnn
else
kshp
[
-
2
:]))
utt
.
verify_grad
(
conv_gradweight
,
[
img
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
def
test_gradinputs
(
self
):
kern_sym
=
theano
.
tensor
.
tensor4
(
'kern'
)
top_sym
=
theano
.
tensor
.
tensor4
(
'top'
)
for
imshp
,
kshp
,
tshp
,
groups
in
zip
(
self
.
img_shape
,
self
.
kern_shape
,
self
.
top_shape
,
self
.
num_groups
):
kern
=
np
.
random
.
random
(
kshp
)
.
astype
(
theano
.
config
.
floatX
)
top
=
np
.
random
.
random
(
tshp
)
.
astype
(
theano
.
config
.
floatX
)
split_kerns
=
np
.
split
(
kern
,
groups
,
axis
=
0
)
split_top
=
np
.
split
(
top
,
groups
,
axis
=
1
)
grouped_convgrad_op
=
self
.
conv2d_gradi
(
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
,
num_groups
=
groups
)
if
self
.
flip_filter
:
grouped_conv_output
=
grouped_convgrad_op
(
kern_sym
[:,
:,
::
-
1
,
::
-
1
],
top_sym
,
tensor
.
as_tensor_variable
(
imshp
[
-
2
:]))
else
:
grouped_conv_output
=
grouped_convgrad_op
(
kern_sym
,
top_sym
,
tensor
.
as_tensor_variable
(
imshp
if
self
.
is_dnn
else
imshp
[
-
2
:]))
grouped_func
=
theano
.
function
([
kern_sym
,
top_sym
],
grouped_conv_output
,
mode
=
self
.
mode
)
assert
any
([
isinstance
(
node
.
op
,
self
.
conv2d_gradi_op
)
for
node
in
grouped_func
.
maker
.
fgraph
.
toposort
()])
grouped_output
=
grouped_func
(
kern
,
top
)
ref_conv_op
=
conv2d_corr_gi
(
kern_sym
,
top_sym
,
imshp
,
border_mode
=
self
.
border_mode
,
subsample
=
self
.
subsample
,
filter_dilation
=
self
.
filter_dilation
)
ref_func
=
theano
.
function
([
kern_sym
,
top_sym
],
ref_conv_op
,
mode
=
self
.
ref_mode
)
ref_concat_output
=
[
ref_func
(
kern_arr
,
top_arr
)
for
kern_arr
,
top_arr
in
zip
(
split_kerns
,
split_top
)]
ref_concat_output
=
np
.
concatenate
(
ref_concat_output
,
axis
=
1
)
utt
.
assert_allclose
(
grouped_output
,
ref_concat_output
)
def
conv_gradinputs
(
filters_val
,
output_val
):
return
grouped_convgrad_op
(
filters_val
,
output_val
,
tensor
.
as_tensor_variable
(
imshp
if
self
.
is_dnn
else
imshp
[
-
2
:]))
utt
.
verify_grad
(
conv_gradinputs
,
[
kern
,
top
],
mode
=
self
.
mode
,
eps
=
1
)
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
c2e14ce1
...
@@ -10,6 +10,7 @@ import theano
...
@@ -10,6 +10,7 @@ import theano
import
theano.tensor
as
T
import
theano.tensor
as
T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet
import
corr
,
conv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv_noOptim
class
TestCorr2D
(
utt
.
InferShapeTester
):
class
TestCorr2D
(
utt
.
InferShapeTester
):
...
@@ -416,6 +417,49 @@ class TestCorr2D(utt.InferShapeTester):
...
@@ -416,6 +417,49 @@ class TestCorr2D(utt.InferShapeTester):
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
2
,
non_contiguous
=
True
)
self
.
validate
((
3
,
2
,
7
,
5
),
(
5
,
2
,
2
,
3
),
2
,
non_contiguous
=
True
)
class
TestGroupCorr2d
(
Grouped_conv_noOptim
):
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
else
:
mode
=
None
conv2d
=
corr
.
CorrMM
conv2d_gradw
=
corr
.
CorrMM_gradWeights
conv2d_gradi
=
corr
.
CorrMM_gradInputs
conv2d_op
=
corr
.
CorrMM
conv2d_gradw_op
=
corr
.
CorrMM_gradWeights
conv2d_gradi_op
=
corr
.
CorrMM_gradInputs
flip_filter
=
True
is_dnn
=
False
def
test_graph
(
self
):
# define common values first
groups
=
3
bottom
=
np
.
random
.
rand
(
3
,
6
,
5
,
5
)
.
astype
(
theano
.
config
.
floatX
)
kern
=
np
.
random
.
rand
(
9
,
2
,
3
,
3
)
.
astype
(
theano
.
config
.
floatX
)
bottom_sym
=
T
.
tensor4
(
'bottom'
)
kern_sym
=
T
.
tensor4
(
'kern'
)
# grouped convolution graph
conv_group
=
self
.
conv2d
(
num_groups
=
groups
)(
bottom_sym
,
kern_sym
)
gconv_func
=
theano
.
function
([
bottom_sym
,
kern_sym
],
conv_group
,
mode
=
self
.
mode
)
# Graph for the normal hard way
kern_offset
=
kern_sym
.
shape
[
0
]
//
groups
bottom_offset
=
bottom_sym
.
shape
[
1
]
//
groups
split_conv_output
=
[
self
.
conv2d
()(
bottom_sym
[:,
i
*
bottom_offset
:(
i
+
1
)
*
bottom_offset
,
:,
:],
kern_sym
[
i
*
kern_offset
:(
i
+
1
)
*
kern_offset
,
:,
:,
:])
for
i
in
range
(
groups
)]
concatenated_output
=
T
.
concatenate
(
split_conv_output
,
axis
=
1
)
conv_func
=
theano
.
function
([
bottom_sym
,
kern_sym
],
concatenated_output
,
mode
=
self
.
mode
)
# calculate outputs for each graph
gconv_output
=
gconv_func
(
bottom
,
kern
)
conv_output
=
conv_func
(
bottom
,
kern
)
# compare values
utt
.
assert_allclose
(
gconv_output
,
conv_output
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
t
=
TestCorr2D
(
'setUp'
)
t
=
TestCorr2D
(
'setUp'
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论