Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
4747cf44
提交
4747cf44
authored
8月 15, 2017
作者:
Frédéric Bastien
提交者:
GitHub
8月 15, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6267 from affanv14/g3
3D Grouped Convolutions
上级
5c92b461
211f0281
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
14 个修改的文件
包含
185 行增加
和
101 行删除
+185
-101
blas.py
theano/gpuarray/blas.py
+0
-0
corr3d_gemm.c
theano/gpuarray/c_code/corr3d_gemm.c
+47
-29
dnn.py
theano/gpuarray/dnn.py
+4
-0
opt.py
theano/gpuarray/opt.py
+11
-6
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+6
-6
test_gemmcorr.py
theano/gpuarray/tests/test_gemmcorr.py
+6
-6
test_gemmcorr3d.py
theano/gpuarray/tests/test_gemmcorr3d.py
+13
-0
abstract_conv.py
theano/tensor/nnet/abstract_conv.py
+0
-0
corr3d_gemm.c
theano/tensor/nnet/c_code/corr3d_gemm.c
+61
-42
corr3d.py
theano/tensor/nnet/corr3d.py
+0
-0
opt.py
theano/tensor/nnet/opt.py
+13
-4
test_abstract_conv.py
theano/tensor/nnet/tests/test_abstract_conv.py
+0
-0
test_corr.py
theano/tensor/nnet/tests/test_corr.py
+8
-8
test_corr3d.py
theano/tensor/nnet/tests/test_corr3d.py
+16
-0
没有找到文件。
theano/gpuarray/blas.py
浏览文件 @
4747cf44
差异被折叠。
点击展开。
theano/gpuarray/c_code/corr3d_gemm.c
浏览文件 @
4747cf44
...
@@ -411,7 +411,8 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -411,7 +411,8 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
const
size_t
dilD
=
1
,
const
size_t
dilD
=
1
,
const
size_t
padH
=
0
,
const
size_t
padH
=
0
,
const
size_t
padW
=
0
,
const
size_t
padW
=
0
,
const
size_t
padD
=
0
)
const
size_t
padD
=
0
,
const
size_t
numgroups
=
1
)
{
{
if
(
PyGpuArray_NDIM
(
bottom
)
!=
5
)
if
(
PyGpuArray_NDIM
(
bottom
)
!=
5
)
{
{
...
@@ -479,11 +480,16 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -479,11 +480,16 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
const
size_t
kH
=
PyGpuArray_DIMS
(
weight
)[
2
];
const
size_t
kH
=
PyGpuArray_DIMS
(
weight
)[
2
];
const
size_t
kW
=
PyGpuArray_DIMS
(
weight
)[
3
];
const
size_t
kW
=
PyGpuArray_DIMS
(
weight
)[
3
];
const
size_t
kD
=
PyGpuArray_DIMS
(
weight
)[
4
];
const
size_t
kD
=
PyGpuArray_DIMS
(
weight
)[
4
];
if
(
nChannels
!=
PyGpuArray_DIMS
(
weight
)[
1
])
{
if
(
nChannels
!=
PyGpuArray_DIMS
(
weight
)[
1
]
*
numgroups
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"GpuCorr3dMM images and kernel must have the same stack size
\n
"
);
"GpuCorr3dMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
return
NULL
;
}
}
if
((
nFilters
%
numgroups
)
!=
0
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM the number of filters must be divisible by the number of groups
\n
"
);
return
NULL
;
}
// implicit dilated filter
// implicit dilated filter
const
size_t
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
size_t
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
size_t
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
...
@@ -511,7 +517,7 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -511,7 +517,7 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
" weight shape: %ld %ld %ld %ld %ld
\n
"
" weight shape: %ld %ld %ld %ld %ld
\n
"
" top shape: %ld %ld %ld %ld %ld (expected %ld %ld %ld %ld %ld)
\n
"
,
" top shape: %ld %ld %ld %ld %ld (expected %ld %ld %ld %ld %ld)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
nFilters
,
nChannels
,
kH
,
kW
,
kD
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
kD
,
PyGpuArray_DIMS
(
top
)[
0
],
PyGpuArray_DIMS
(
top
)[
1
],
PyGpuArray_DIMS
(
top
)[
0
],
PyGpuArray_DIMS
(
top
)[
1
],
PyGpuArray_DIMS
(
top
)[
2
],
PyGpuArray_DIMS
(
top
)[
3
],
PyGpuArray_DIMS
(
top
)[
4
],
PyGpuArray_DIMS
(
top
)[
2
],
PyGpuArray_DIMS
(
top
)[
3
],
PyGpuArray_DIMS
(
top
)[
4
],
batchSize
,
nFilters
,
topHeight
,
topWidth
,
topDepth
);
batchSize
,
nFilters
,
topHeight
,
topWidth
,
topDepth
);
...
@@ -542,11 +548,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -542,11 +548,17 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
}
}
// Define some useful variables
// Define some useful variables
const
size_t
bottom_stride
=
PyGpuArray_STRIDES
(
bottom
)[
0
]
/
gpuarray_get_elsize
(
bottom
->
ga
.
typecode
);
const
size_t
batch_bottom_stride
=
PyGpuArray_STRIDES
(
bottom
)[
0
]
/
gpuarray_get_elsize
(
bottom
->
ga
.
typecode
);
const
size_t
top_stride
=
PyGpuArray_STRIDES
(
top
)[
0
]
/
gpuarray_get_elsize
(
top
->
ga
.
typecode
);
const
size_t
batch_top_stride
=
PyGpuArray_STRIDES
(
top
)[
0
]
/
gpuarray_get_elsize
(
top
->
ga
.
typecode
);
const
size_t
K_
=
col_dim
[
0
];
const
size_t
group_bottom_stride
=
(
PyGpuArray_STRIDES
(
bottom
)[
1
]
*
nChannels
/
numgroups
)
/
gpuarray_get_elsize
(
bottom
->
ga
.
typecode
);
const
size_t
group_top_stride
=
(
PyGpuArray_STRIDES
(
top
)[
1
]
*
nFilters
/
numgroups
)
/
gpuarray_get_elsize
(
top
->
ga
.
typecode
);
const
size_t
group_weight_stride
=
(
PyGpuArray_STRIDES
(
weight
)[
0
]
*
nFilters
/
numgroups
)
/
gpuarray_get_elsize
(
weight
->
ga
.
typecode
);
const
size_t
K_
=
col_dim
[
0
]
/
numgroups
;
const
size_t
N_
=
col_dim
[
1
];
const
size_t
N_
=
col_dim
[
1
];
const
size_t
M_
=
nFilters
;
const
size_t
group_col_stride
=
(
K_
*
N_
);
const
size_t
M_
=
nFilters
/
numgroups
;
PyGpuArrayObject
*
output
;
PyGpuArrayObject
*
output
;
if
(
direction
==
0
)
{
// forward pass
if
(
direction
==
0
)
{
// forward pass
...
@@ -567,20 +579,22 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -567,20 +579,22 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im3d2col
// First, im3d2col
err
=
im3d2col
(
err
=
im3d2col
(
&
bottom
->
ga
,
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
&
bottom
->
ga
,
n
*
b
atch_b
ottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
bottomWidth
,
bottomDepth
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
&
col
->
ga
);
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
&
col
->
ga
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
return
NULL
;
return
NULL
;
}
}
// Second, gemm
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
){
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
// Second, gemm
N_
,
M_
,
K_
,
1
,
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_no_trans
,
&
col
->
ga
,
0
,
N_
,
N_
,
M_
,
K_
,
1
,
&
weight
->
ga
,
0
,
K_
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
0
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
&
top
->
ga
,
n
*
top_stride
,
N_
);
0
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
);
}
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorr3dMM forward encountered an error running gemm."
);
"GpuCorr3dMM forward encountered an error running gemm."
);
...
@@ -607,7 +621,7 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -607,7 +621,7 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// First, im3d2col
// First, im3d2col
err
=
im3d2col
(
err
=
im3d2col
(
&
bottom
->
ga
,
n
*
bottom_stride
,
nChannels
,
bottomHeight
,
&
bottom
->
ga
,
n
*
b
atch_b
ottom_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
bottomWidth
,
bottomDepth
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
&
col
->
ga
);
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
&
col
->
ga
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
...
@@ -618,12 +632,14 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -618,12 +632,14 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
// Note that we accumulate into weight. We do so by setting beta = 0
// Note that we accumulate into weight. We do so by setting beta = 0
// for the first iteration and beta = 1 for subsequent ones. (This
// for the first iteration and beta = 1 for subsequent ones. (This
// is faster than setting weight to all zeros before the loop.)
// is faster than setting weight to all zeros before the loop.)
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
){
K_
,
M_
,
N_
,
1
,
err
=
rgemm
(
cb_fortran
,
cb_trans
,
cb_no_trans
,
&
col
->
ga
,
0
,
N_
,
K_
,
M_
,
N_
,
1
,
&
top
->
ga
,
n
*
top_stride
,
N_
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
,
(
n
==
0
)
?
0
:
1
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
&
weight
->
ga
,
0
,
K_
);
(
n
==
0
)
?
0
:
1
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
);
}
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorr3dMM grad weights encountered an error running gemm."
);
"GpuCorr3dMM grad weights encountered an error running gemm."
);
...
@@ -658,12 +674,14 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -658,12 +674,14 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
// Iterate over batch
// Iterate over batch
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
for
(
size_t
n
=
0
;
n
<
batchSize
;
n
++
)
{
// gemm into columns
// gemm into columns
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
for
(
size_t
g
=
0
;
g
<
numgroups
;
++
g
){
N_
,
K_
,
M_
,
1
,
err
=
rgemm
(
cb_fortran
,
cb_no_trans
,
cb_trans
,
&
top
->
ga
,
n
*
top_stride
,
N_
,
N_
,
K_
,
M_
,
1
,
&
weight
->
ga
,
0
,
K_
,
&
top
->
ga
,
n
*
batch_top_stride
+
g
*
group_top_stride
,
N_
,
0
,
&
weight
->
ga
,
g
*
group_weight_stride
,
K_
,
&
col
->
ga
,
0
,
N_
);
0
,
&
col
->
ga
,
g
*
group_col_stride
,
N_
);
}
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
PyErr_Format
(
PyExc_RuntimeError
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuCorr3dMM grad inputs encountered an error running gemm."
);
"GpuCorr3dMM grad inputs encountered an error running gemm."
);
...
@@ -674,7 +692,7 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
...
@@ -674,7 +692,7 @@ PyGpuArrayObject* corr3dMM(PyGpuArrayObject *const bottom,
err
=
col2im3d
(
&
col
->
ga
,
nChannels
,
err
=
col2im3d
(
&
col
->
ga
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
&
bottom
->
ga
,
n
*
bottom_stride
);
dH
,
dW
,
dD
,
&
bottom
->
ga
,
n
*
b
atch_b
ottom_stride
);
if
(
err
!=
GA_NO_ERROR
)
{
if
(
err
!=
GA_NO_ERROR
)
{
Py_DECREF
(
col
);
Py_DECREF
(
col
);
return
NULL
;
return
NULL
;
...
...
theano/gpuarray/dnn.py
浏览文件 @
4747cf44
...
@@ -2790,6 +2790,8 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
...
@@ -2790,6 +2790,8 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
if
version
(
raises
=
False
)
<
6000
and
op
.
filter_dilation
!=
(
1
,
1
):
if
version
(
raises
=
False
)
<
6000
and
op
.
filter_dilation
!=
(
1
,
1
):
return
None
return
None
if
op
.
num_groups
>
1
:
return
None
inp1
=
inputs
[
0
]
inp1
=
inputs
[
0
]
inp2
=
inputs
[
1
]
inp2
=
inputs
[
1
]
...
@@ -2839,6 +2841,8 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
...
@@ -2839,6 +2841,8 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
if
version
(
raises
=
False
)
<
6000
and
op
.
filter_dilation
!=
(
1
,
1
,
1
):
if
version
(
raises
=
False
)
<
6000
and
op
.
filter_dilation
!=
(
1
,
1
,
1
):
return
None
return
None
if
op
.
num_groups
>
1
:
return
None
inp1
=
inputs
[
0
]
inp1
=
inputs
[
0
]
inp2
=
inputs
[
1
]
inp2
=
inputs
[
1
]
...
...
theano/gpuarray/opt.py
浏览文件 @
4747cf44
...
@@ -1707,7 +1707,8 @@ def local_abstractconv3d_gemm(node):
...
@@ -1707,7 +1707,8 @@ def local_abstractconv3d_gemm(node):
border_mode
=
node
.
op
.
border_mode
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
filter_dilation
=
node
.
op
.
filter_dilation
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
,
1
))):
num_groups
=
node
.
op
.
num_groups
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
,
1
))
and
num_groups
==
1
):
if
not
node
.
op
.
filter_flip
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
# need to dimshuffle the kernel for full convolution
# need to dimshuffle the kernel for full convolution
...
@@ -1724,8 +1725,9 @@ def local_abstractconv3d_gemm(node):
...
@@ -1724,8 +1725,9 @@ def local_abstractconv3d_gemm(node):
# By default use GpuCorr3dMM
# By default use GpuCorr3dMM
rval
=
GpuCorr3dMM
(
border_mode
,
rval
=
GpuCorr3dMM
(
border_mode
,
subsample
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
),
filter_dilation
,
gpu_contiguous
(
kern
))
num_groups
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
kern
))
# call GpuCorr3dMM_gradWeights if good
# call GpuCorr3dMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth * kernelDepth
# (the latter is faster if batchsize * kernelHeight * kernelWidth * kernelDepth
...
@@ -1737,7 +1739,8 @@ def local_abstractconv3d_gemm(node):
...
@@ -1737,7 +1739,8 @@ def local_abstractconv3d_gemm(node):
(
None
not
in
node
.
op
.
imshp
[
-
3
:])
and
(
None
not
in
node
.
op
.
imshp
[
-
3
:])
and
(
node
.
op
.
kshp
is
not
None
)
and
(
node
.
op
.
kshp
is
not
None
)
and
(
None
not
in
node
.
op
.
kshp
)
and
(
None
not
in
node
.
op
.
kshp
)
and
border_mode
!=
"half"
):
border_mode
!=
"half"
and
num_groups
==
1
):
# we know the kernel and output size
# we know the kernel and output size
prod1
=
node
.
op
.
kshp
[
0
]
*
node
.
op
.
kshp
[
1
]
*
node
.
op
.
kshp
[
2
]
prod1
=
node
.
op
.
kshp
[
0
]
*
node
.
op
.
kshp
[
1
]
*
node
.
op
.
kshp
[
2
]
prod2
=
((
node
.
op
.
imshp
[
-
3
]
-
node
.
op
.
kshp
[
0
]
+
1
)
*
prod2
=
((
node
.
op
.
imshp
[
-
3
]
-
node
.
op
.
kshp
[
0
]
+
1
)
*
...
@@ -1929,7 +1932,8 @@ def local_abstractconv3d_gradweights_gemm(node):
...
@@ -1929,7 +1932,8 @@ def local_abstractconv3d_gradweights_gemm(node):
rval
=
GpuCorr3dMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
GpuCorr3dMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
gpu_contiguous
(
img
),
gpu_contiguous
(
topgrad
),
shape
)
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
...
@@ -1999,7 +2003,8 @@ def local_abstractconv3d_gradinputs_gemm(node):
...
@@ -1999,7 +2003,8 @@ def local_abstractconv3d_gradinputs_gemm(node):
rval
=
GpuCorr3dMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
GpuCorr3dMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
gpu_contiguous
(
kern
),
gpu_contiguous
(
topgrad
),
shape
)
return
[
rval
]
return
[
rval
]
...
...
theano/gpuarray/tests/test_dnn.py
浏览文件 @
4747cf44
...
@@ -2292,11 +2292,11 @@ def dconv2di(border_mode, subsample, filter_dilation, num_groups):
...
@@ -2292,11 +2292,11 @@ def dconv2di(border_mode, subsample, filter_dilation, num_groups):
class
Cudnn_grouped_conv
(
Grouped_conv_noOptim
):
class
Cudnn_grouped_conv
(
Grouped_conv_noOptim
):
mode
=
mode_with_gpu
mode
=
mode_with_gpu
conv
2d
=
staticmethod
(
dconv2d
)
conv
=
staticmethod
(
dconv2d
)
conv
2d
_gradw
=
staticmethod
(
dconv2dw
)
conv_gradw
=
staticmethod
(
dconv2dw
)
conv
2d
_gradi
=
staticmethod
(
dconv2di
)
conv_gradi
=
staticmethod
(
dconv2di
)
conv
2d
_op
=
dnn
.
GpuDnnConv
conv_op
=
dnn
.
GpuDnnConv
conv
2d
_gradw_op
=
dnn
.
GpuDnnConvGradW
conv_gradw_op
=
dnn
.
GpuDnnConvGradW
conv
2d
_gradi_op
=
dnn
.
GpuDnnConvGradI
conv_gradi_op
=
dnn
.
GpuDnnConvGradI
flip_filter
=
False
flip_filter
=
False
is_dnn
=
True
is_dnn
=
True
theano/gpuarray/tests/test_gemmcorr.py
浏览文件 @
4747cf44
...
@@ -224,11 +224,11 @@ class TestCorrMM(unittest.TestCase):
...
@@ -224,11 +224,11 @@ class TestCorrMM(unittest.TestCase):
class
TestGroupGpuCorr2d
(
Grouped_conv_noOptim
):
class
TestGroupGpuCorr2d
(
Grouped_conv_noOptim
):
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
conv
2d
=
GpuCorrMM
conv
=
GpuCorrMM
conv
2d
_gradw
=
GpuCorrMM_gradWeights
conv_gradw
=
GpuCorrMM_gradWeights
conv
2d
_gradi
=
GpuCorrMM_gradInputs
conv_gradi
=
GpuCorrMM_gradInputs
conv
2d
_op
=
GpuCorrMM
conv_op
=
GpuCorrMM
conv
2d
_gradw_op
=
GpuCorrMM_gradWeights
conv_gradw_op
=
GpuCorrMM_gradWeights
conv
2d
_gradi_op
=
GpuCorrMM_gradInputs
conv_gradi_op
=
GpuCorrMM_gradInputs
flip_filter
=
True
flip_filter
=
True
is_dnn
=
False
is_dnn
=
False
theano/gpuarray/tests/test_gemmcorr3d.py
浏览文件 @
4747cf44
...
@@ -11,6 +11,7 @@ from theano.tensor.nnet.corr3d import Corr3dMM, Corr3dMM_gradWeights, Corr3dMM_g
...
@@ -11,6 +11,7 @@ from theano.tensor.nnet.corr3d import Corr3dMM, Corr3dMM_gradWeights, Corr3dMM_g
from
..type
import
gpuarray_shared_constructor
from
..type
import
gpuarray_shared_constructor
from
..blas
import
GpuCorr3dMM
,
GpuCorr3dMM_gradWeights
,
GpuCorr3dMM_gradInputs
from
..blas
import
GpuCorr3dMM
,
GpuCorr3dMM_gradWeights
,
GpuCorr3dMM_gradInputs
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
ref_cast
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv3d_noOptim
class
TestCorr3dMM
(
unittest
.
TestCase
):
class
TestCorr3dMM
(
unittest
.
TestCase
):
...
@@ -218,3 +219,15 @@ class TestCorr3dMM(unittest.TestCase):
...
@@ -218,3 +219,15 @@ class TestCorr3dMM(unittest.TestCase):
verify_grad
=
False
)
verify_grad
=
False
)
self
.
run_gradinput
(
inputs_shape
=
(
1
,
1024
,
3
,
3
,
1
),
self
.
run_gradinput
(
inputs_shape
=
(
1
,
1024
,
3
,
3
,
1
),
filters_shape
=
(
1
,
1
,
1
,
1
,
1024
))
filters_shape
=
(
1
,
1
,
1
,
1
,
1024
))
class
TestGroupGpuCorr3d
(
Grouped_conv3d_noOptim
):
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
conv
=
GpuCorr3dMM
conv_gradw
=
GpuCorr3dMM_gradWeights
conv_gradi
=
GpuCorr3dMM_gradInputs
conv_op
=
GpuCorr3dMM
conv_gradw_op
=
GpuCorr3dMM_gradWeights
conv_gradi_op
=
GpuCorr3dMM_gradInputs
flip_filter
=
True
is_dnn
=
False
theano/tensor/nnet/abstract_conv.py
浏览文件 @
4747cf44
差异被折叠。
点击展开。
theano/tensor/nnet/c_code/corr3d_gemm.c
浏览文件 @
4747cf44
...
@@ -127,7 +127,8 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -127,7 +127,8 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
const
int
dilD
=
1
,
const
int
dilD
=
1
,
const
int
padH
=
0
,
const
int
padH
=
0
,
const
int
padW
=
0
,
const
int
padW
=
0
,
const
int
padD
=
0
)
const
int
padD
=
0
,
const
int
numgroups
=
1
)
{
{
if
(
PyArray_NDIM
(
bottom
)
!=
5
)
if
(
PyArray_NDIM
(
bottom
)
!=
5
)
{
{
...
@@ -178,11 +179,16 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -178,11 +179,16 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
const
int
kH
=
PyArray_DIMS
(
weight
)[
2
];
const
int
kH
=
PyArray_DIMS
(
weight
)[
2
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
3
];
const
int
kW
=
PyArray_DIMS
(
weight
)[
3
];
const
int
kD
=
PyArray_DIMS
(
weight
)[
4
];
const
int
kD
=
PyArray_DIMS
(
weight
)[
4
];
if
(
nChannels
!=
PyArray_DIMS
(
weight
)[
1
])
{
if
(
nChannels
!=
PyArray_DIMS
(
weight
)[
1
]
*
numgroups
)
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"Corr3dMM images and kernel must have the same stack size
\n
"
);
"Corr3dMM images and kernel must have the same stack size
\n
"
);
return
NULL
;
return
NULL
;
}
}
if
((
nFilters
%%
numgroups
)
!=
0
)
{
PyErr_SetString
(
PyExc_ValueError
,
"CorrMM the number of filters must be divisible by the number of groups
\n
"
);
return
NULL
;
}
// implicit dilated filter
// implicit dilated filter
const
int
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
int
dil_kH
=
(
kH
-
1
)
*
dilH
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
const
int
dil_kW
=
(
kW
-
1
)
*
dilW
+
1
;
...
@@ -210,7 +216,7 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -210,7 +216,7 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
" weight shape: %%d %%d %%d %%d %%d
\n
"
" weight shape: %%d %%d %%d %%d %%d
\n
"
" top shape: %%ld %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d %%d)
\n
"
,
" top shape: %%ld %%ld %%ld %%ld %%ld (expected %%d %%d %%d %%d %%d)
\n
"
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
batchSize
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
nFilters
,
nChannels
,
kH
,
kW
,
kD
,
nFilters
,
nChannels
/
numgroups
,
kH
,
kW
,
kD
,
PyArray_DIMS
(
top
)[
0
],
PyArray_DIMS
(
top
)[
1
],
PyArray_DIMS
(
top
)[
0
],
PyArray_DIMS
(
top
)[
1
],
PyArray_DIMS
(
top
)[
2
],
PyArray_DIMS
(
top
)[
3
],
PyArray_DIMS
(
top
)[
4
],
PyArray_DIMS
(
top
)[
2
],
PyArray_DIMS
(
top
)[
3
],
PyArray_DIMS
(
top
)[
4
],
batchSize
,
nFilters
,
topHeight
,
topWidth
,
topDepth
);
batchSize
,
nFilters
,
topHeight
,
topWidth
,
topDepth
);
...
@@ -241,12 +247,16 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -241,12 +247,16 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
}
}
// Define some useful variables
// Define some useful variables
const
int
bottom_stride
=
PyArray_STRIDES
(
bottom
)[
0
]
/%
(
n_bytes
)
f
;
const
int
batch_bottom_stride
=
PyArray_STRIDES
(
bottom
)[
0
]
/%
(
n_bytes
)
f
;
const
int
top_stride
=
PyArray_STRIDES
(
top
)[
0
]
/%
(
n_bytes
)
f
;
const
int
group_bottom_stride
=
(
PyArray_STRIDES
(
bottom
)[
1
]
*
nChannels
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
K_
=
col_dim
[
1
];
const
int
batch_top_stride
=
PyArray_STRIDES
(
top
)[
0
]
/%
(
n_bytes
)
f
;
const
int
group_top_stride
=
(
PyArray_STRIDES
(
top
)[
1
]
*
nFilters
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
K_
=
col_dim
[
1
]
/
numgroups
;
const
int
N_
=
col_dim
[
2
];
const
int
N_
=
col_dim
[
2
];
const
int
col_stride
=
(
K_
*
N_
);
const
int
col_stride
=
(
K_
*
N_
*
numgroups
);
const
int
M_
=
nFilters
;
const
int
group_col_stride
=
(
K_
*
N_
);
const
int
group_weight_stride
=
(
PyArray_STRIDES
(
weight
)[
0
]
*
nFilters
/
numgroups
)
/%
(
n_bytes
)
f
;
const
int
M_
=
nFilters
/
numgroups
;
const
%
(
c_float_type
)
s
one
=
1
.
0
;
const
%
(
c_float_type
)
s
one
=
1
.
0
;
const
%
(
c_float_type
)
s
zero
=
0
.
0
;
const
%
(
c_float_type
)
s
zero
=
0
.
0
;
char
NTrans
=
'N'
;
char
NTrans
=
'N'
;
...
@@ -280,18 +290,21 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -280,18 +290,21 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
int
tid
=
%
(
omp_get_thread_num
)
s
;
int
tid
=
%
(
omp_get_thread_num
)
s
;
// First, im3d2col
// First, im3d2col
im3d2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
ottom_stride
,
nChannels
,
im3d2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
atch_bottom_stride
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
// Second, gemm
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
&
N_
,
&
M_
,
&
K_
,
// Second, gemm
&
one
,
%
(
gemm
)
s
(
&
NTrans
,
&
NTrans
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
&
N_
,
&
N_
,
&
M_
,
&
K_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
&
K_
,
&
one
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
,
&
N_
);
}
}
}
// Restore to previous blas threads
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
...
@@ -300,7 +313,7 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -300,7 +313,7 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
output
=
weight
;
output
=
weight
;
npy_intp
weight_dim
[
2
];
npy_intp
weight_dim
[
2
];
weight_dim
[
0
]
=
(
npy_intp
)
max_threads
;
weight_dim
[
0
]
=
(
npy_intp
)
max_threads
;
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
);
weight_dim
[
1
]
=
(
npy_intp
)(
M_
*
K_
*
numgroups
);
PyArrayObject
*
local_weight
=
(
PyArrayObject
*
)
PyArray_ZEROS
(
2
,
PyArrayObject
*
local_weight
=
(
PyArrayObject
*
)
PyArray_ZEROS
(
2
,
weight_dim
,
PyArray_TYPE
(
weight
),
0
);
weight_dim
,
PyArray_TYPE
(
weight
),
0
);
...
@@ -322,22 +335,25 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -322,22 +335,25 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
int
tid
=
%
(
omp_get_thread_num
)
s
;
int
tid
=
%
(
omp_get_thread_num
)
s
;
// First, im2col
// First, im2col
im3d2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
ottom_stride
,
nChannels
,
im3d2col
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
atch_bottom_stride
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
);
// Second, gemm
// Note that we accumulate into weight. We do so by setting beta = 0
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
// for the first iteration and beta = 1 for subsequent ones. (This
// Second, gemm
// is faster than setting weight to all zeros before the loop.)
// Note that we accumulate into weight. We do so by setting beta = 0
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
// for the first iteration and beta = 1 for subsequent ones. (This
&
K_
,
&
M_
,
&
N_
,
// is faster than setting weight to all zeros before the loop.)
&
one
,
%
(
gemm
)
s
(
&
Trans
,
&
NTrans
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
&
N_
,
&
K_
,
&
M_
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
,
&
one
,
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
,
&
N_
,
tid
*
weight_dim
[
1
],
&
K_
);
(
n
==
0
)
?
&
zero
:
&
one
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
local_weight
)
+
g
*
group_weight_stride
+
tid
*
weight_dim
[
1
],
&
K_
);
}
}
}
// Restore to previous blas threads
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
...
@@ -370,20 +386,23 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
...
@@ -370,20 +386,23 @@ PyArrayObject* corr3dMM(PyArrayObject* bottom,
%
(
blas_set_num_threads
)
s
(
1
);
%
(
blas_set_num_threads
)
s
(
1
);
%
(
omp_flags
)
s
%
(
omp_flags
)
s
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batchSize
;
++
n
)
{
// gemm into columns
int
tid
=
%
(
omp_get_thread_num
)
s
;
int
tid
=
%
(
omp_get_thread_num
)
s
;
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
for
(
int
g
=
0
;
g
<
numgroups
;
++
g
){
&
N_
,
&
K_
,
&
M_
,
// gemm into columns
&
one
,
%
(
gemm
)
s
(
&
NTrans
,
&
Trans
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
top_stride
,
&
N_
,
&
N_
,
&
K_
,
&
M_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
),
&
K_
,
&
one
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
top
)
+
n
*
batch_top_stride
+
g
*
group_top_stride
,
&
N_
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
&
N_
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
weight
)
+
g
*
group_weight_stride
,
&
K_
,
&
zero
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
+
g
*
group_col_stride
,
&
N_
);
}
// col2im back to the data
// col2im back to the data
col2im3d
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
col2im3d
((
%
(
float_type
)
s
*
)
PyArray_DATA
(
col
)
+
tid
*
col_stride
,
nChannels
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
bottomHeight
,
bottomWidth
,
bottomDepth
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
kH
,
kW
,
kD
,
dilH
,
dilW
,
dilD
,
padH
,
padW
,
padD
,
dH
,
dW
,
dD
,
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
bottom_stride
);
(
%
(
float_type
)
s
*
)
PyArray_DATA
(
bottom
)
+
n
*
b
atch_b
ottom_stride
);
}
}
// Restore to previous blas threads
// Restore to previous blas threads
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
%
(
blas_set_num_threads
)
s
(
blas_threads_saved
);
...
...
theano/tensor/nnet/corr3d.py
浏览文件 @
4747cf44
差异被折叠。
点击展开。
theano/tensor/nnet/opt.py
浏览文件 @
4747cf44
...
@@ -114,7 +114,8 @@ def local_abstractconv3d_gemm(node):
...
@@ -114,7 +114,8 @@ def local_abstractconv3d_gemm(node):
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
rval
=
Corr3dMM
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
Corr3dMM
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
kern
)
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
kern
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
return
[
rval
]
...
@@ -163,7 +164,8 @@ def local_abstractconv3d_gradweight_gemm(node):
...
@@ -163,7 +164,8 @@ def local_abstractconv3d_gradweight_gemm(node):
rval
=
Corr3dMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
Corr3dMM_gradWeights
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
img
,
topgrad
,
shape
)
filter_dilation
=
node
.
op
.
filter_dilation
,
num_groups
=
node
.
op
.
num_groups
)(
img
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
# need to flip the kernel if necessary
# need to flip the kernel if necessary
...
@@ -219,8 +221,9 @@ def local_abstractconv3d_gradinputs_gemm(node):
...
@@ -219,8 +221,9 @@ def local_abstractconv3d_gradinputs_gemm(node):
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
rval
=
Corr3dMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
rval
=
Corr3dMM_gradInputs
(
border_mode
=
node
.
op
.
border_mode
,
subsample
=
node
.
op
.
subsample
,
subsample
=
node
.
op
.
subsample
,
filter_dilation
=
node
.
op
.
filter_dilation
)(
kern
,
topgrad
,
filter_dilation
=
node
.
op
.
filter_dilation
,
shape
)
num_groups
=
node
.
op
.
num_groups
)(
kern
,
topgrad
,
shape
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
copy_stack_trace
(
node
.
outputs
[
0
],
rval
)
return
[
rval
]
return
[
rval
]
...
@@ -267,6 +270,8 @@ def local_conv3d_cpu(node):
...
@@ -267,6 +270,8 @@ def local_conv3d_cpu(node):
return
None
return
None
if
node
.
op
.
filter_dilation
!=
(
1
,
1
,
1
):
if
node
.
op
.
filter_dilation
!=
(
1
,
1
,
1
):
return
None
return
None
if
node
.
op
.
num_groups
>
1
:
return
None
bias
=
theano
.
tensor
.
zeros_like
(
kern
[:,
0
,
0
,
0
,
0
])
bias
=
theano
.
tensor
.
zeros_like
(
kern
[:,
0
,
0
,
0
,
0
])
...
@@ -419,6 +424,8 @@ def local_conv3d_gradweight_cpu(node):
...
@@ -419,6 +424,8 @@ def local_conv3d_gradweight_cpu(node):
return
None
return
None
if
node
.
op
.
filter_dilation
!=
(
1
,
1
,
1
):
if
node
.
op
.
filter_dilation
!=
(
1
,
1
,
1
):
return
None
return
None
if
node
.
op
.
num_groups
>
1
:
return
None
# conv3D expects shape (batch, row, column, time, channel)
# conv3D expects shape (batch, row, column, time, channel)
img
=
img
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
)
img
=
img
.
dimshuffle
(
0
,
2
,
3
,
4
,
1
)
...
@@ -544,6 +551,8 @@ def local_conv3d_gradinputs_cpu(node):
...
@@ -544,6 +551,8 @@ def local_conv3d_gradinputs_cpu(node):
return
None
return
None
if
node
.
op
.
filter_dilation
!=
(
1
,
1
,
1
):
if
node
.
op
.
filter_dilation
!=
(
1
,
1
,
1
):
return
None
return
None
if
node
.
op
.
num_groups
>
1
:
return
None
# need to flip the kernel if necessary (conv3D does not flip)
# need to flip the kernel if necessary (conv3D does not flip)
if
node
.
op
.
filter_flip
:
if
node
.
op
.
filter_flip
:
...
...
theano/tensor/nnet/tests/test_abstract_conv.py
浏览文件 @
4747cf44
差异被折叠。
点击展开。
theano/tensor/nnet/tests/test_corr.py
浏览文件 @
4747cf44
...
@@ -422,12 +422,12 @@ class TestGroupCorr2d(Grouped_conv_noOptim):
...
@@ -422,12 +422,12 @@ class TestGroupCorr2d(Grouped_conv_noOptim):
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
else
:
else
:
mode
=
None
mode
=
None
conv
2d
=
corr
.
CorrMM
conv
=
corr
.
CorrMM
conv
2d
_gradw
=
corr
.
CorrMM_gradWeights
conv_gradw
=
corr
.
CorrMM_gradWeights
conv
2d
_gradi
=
corr
.
CorrMM_gradInputs
conv_gradi
=
corr
.
CorrMM_gradInputs
conv
2d
_op
=
corr
.
CorrMM
conv_op
=
corr
.
CorrMM
conv
2d
_gradw_op
=
corr
.
CorrMM_gradWeights
conv_gradw_op
=
corr
.
CorrMM_gradWeights
conv
2d
_gradi_op
=
corr
.
CorrMM_gradInputs
conv_gradi_op
=
corr
.
CorrMM_gradInputs
flip_filter
=
True
flip_filter
=
True
is_dnn
=
False
is_dnn
=
False
...
@@ -440,13 +440,13 @@ class TestGroupCorr2d(Grouped_conv_noOptim):
...
@@ -440,13 +440,13 @@ class TestGroupCorr2d(Grouped_conv_noOptim):
kern_sym
=
T
.
tensor4
(
'kern'
)
kern_sym
=
T
.
tensor4
(
'kern'
)
# grouped convolution graph
# grouped convolution graph
conv_group
=
self
.
conv
2d
(
num_groups
=
groups
)(
bottom_sym
,
kern_sym
)
conv_group
=
self
.
conv
(
num_groups
=
groups
)(
bottom_sym
,
kern_sym
)
gconv_func
=
theano
.
function
([
bottom_sym
,
kern_sym
],
conv_group
,
mode
=
self
.
mode
)
gconv_func
=
theano
.
function
([
bottom_sym
,
kern_sym
],
conv_group
,
mode
=
self
.
mode
)
# Graph for the normal hard way
# Graph for the normal hard way
kern_offset
=
kern_sym
.
shape
[
0
]
//
groups
kern_offset
=
kern_sym
.
shape
[
0
]
//
groups
bottom_offset
=
bottom_sym
.
shape
[
1
]
//
groups
bottom_offset
=
bottom_sym
.
shape
[
1
]
//
groups
split_conv_output
=
[
self
.
conv
2d
()(
bottom_sym
[:,
i
*
bottom_offset
:(
i
+
1
)
*
bottom_offset
,
:,
:],
split_conv_output
=
[
self
.
conv
()(
bottom_sym
[:,
i
*
bottom_offset
:(
i
+
1
)
*
bottom_offset
,
:,
:],
kern_sym
[
i
*
kern_offset
:(
i
+
1
)
*
kern_offset
,
:,
:,
:])
kern_sym
[
i
*
kern_offset
:(
i
+
1
)
*
kern_offset
,
:,
:,
:])
for
i
in
range
(
groups
)]
for
i
in
range
(
groups
)]
concatenated_output
=
T
.
concatenate
(
split_conv_output
,
axis
=
1
)
concatenated_output
=
T
.
concatenate
(
split_conv_output
,
axis
=
1
)
...
...
theano/tensor/nnet/tests/test_corr3d.py
浏览文件 @
4747cf44
...
@@ -12,6 +12,7 @@ import theano
...
@@ -12,6 +12,7 @@ import theano
import
theano.tensor
as
T
import
theano.tensor
as
T
from
theano.tests
import
unittest_tools
as
utt
from
theano.tests
import
unittest_tools
as
utt
from
theano.tensor.nnet
import
corr3d
,
conv
from
theano.tensor.nnet
import
corr3d
,
conv
from
theano.tensor.nnet.tests.test_abstract_conv
import
Grouped_conv3d_noOptim
class
TestCorr3D
(
utt
.
InferShapeTester
):
class
TestCorr3D
(
utt
.
InferShapeTester
):
...
@@ -418,6 +419,21 @@ class TestCorr3D(utt.InferShapeTester):
...
@@ -418,6 +419,21 @@ class TestCorr3D(utt.InferShapeTester):
self
.
validate
((
3
,
1
,
7
,
5
,
5
),
(
2
,
1
,
2
,
3
,
3
),
(
2
,
1
,
1
),
non_contiguous
=
True
)
self
.
validate
((
3
,
1
,
7
,
5
,
5
),
(
2
,
1
,
2
,
3
,
3
),
(
2
,
1
,
1
),
non_contiguous
=
True
)
class
TestGroupCorr3d
(
Grouped_conv3d_noOptim
):
if
theano
.
config
.
mode
==
"FAST_COMPILE"
:
mode
=
theano
.
compile
.
get_mode
(
"FAST_RUN"
)
else
:
mode
=
None
conv
=
corr3d
.
Corr3dMM
conv_gradw
=
corr3d
.
Corr3dMM_gradWeights
conv_gradi
=
corr3d
.
Corr3dMM_gradInputs
conv_op
=
corr3d
.
Corr3dMM
conv_gradw_op
=
corr3d
.
Corr3dMM_gradWeights
conv_gradi_op
=
corr3d
.
Corr3dMM_gradInputs
flip_filter
=
True
is_dnn
=
False
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
t
=
TestCorr3D
(
'setUp'
)
t
=
TestCorr3D
(
'setUp'
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论