Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
3e86efec
提交
3e86efec
authored
8月 11, 2017
作者:
Frédéric Bastien
提交者:
GitHub
8月 11, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #6143 from affanv14/metaopt
Implement Meta-optimizer on new backend
上级
43556e98
35df202d
隐藏空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
841 行增加
和
29 行删除
+841
-29
config.txt
doc/library/config.txt
+23
-0
configdefaults.py
theano/configdefaults.py
+15
-2
opt.py
theano/gof/opt.py
+27
-14
dnn.py
theano/gpuarray/dnn.py
+218
-9
opt.py
theano/gpuarray/opt.py
+358
-4
test_opt.py
theano/gpuarray/tests/test_opt.py
+200
-0
没有找到文件。
doc/library/config.txt
浏览文件 @
3e86efec
...
...
@@ -1116,3 +1116,26 @@ import theano and print the config variable, as in:
The number of user stack level to keep for variables during Theano
compilation. If higher then 0, will make us keep Theano internal
stack trace.
.. attribute:: config.metaopt.verbose
Int value, default: 0
The verbosity level of the meta-optimizer. 0 for silent.
1 to only warn if we cannot meta-optimize some op.
2 for full output of separate timings and selected implementation
.. attribute:: config.metaopt.optimizer_excluding
Default: ``""``
A list of optimizer tags that we don't want included in the Meta-optimizer.
If multiple tags, separate them by ':'.
.. attribute:: config.metaopt.optimizer_including
Default: ``""``
A list of optimizer tags that we want included in the Meta-optimizer.
If multiple tags, separate them by ':'.
theano/configdefaults.py
浏览文件 @
3e86efec
...
...
@@ -1456,10 +1456,23 @@ AddConfigVar('blas.ldflags',
AddConfigVar
(
'metaopt.verbose'
,
"Enable verbose output for meta optimizers"
,
theano
.
configparser
.
BoolParam
(
False
),
"0 for silent, 1 for only warnings, 2 for full output with"
"timings and selected implementation"
,
theano
.
configparser
.
IntParam
(
0
),
in_c_key
=
False
)
AddConfigVar
(
'metaopt.optimizer_excluding'
,
(
"exclude optimizers with these tags. "
"Separate tags with ':'."
),
StrParam
(
""
),
in_c_key
=
False
)
AddConfigVar
(
'metaopt.optimizer_including'
,
(
"include optimizers with these tags. "
"Separate tags with ':'."
),
StrParam
(
""
),
in_c_key
=
False
)
AddConfigVar
(
'profile'
,
"If VM should collect profile information"
,
BoolParam
(
False
),
...
...
theano/gof/opt.py
浏览文件 @
3e86efec
...
...
@@ -1131,13 +1131,20 @@ class LocalMetaOptimizer(LocalOptimizer):
"""
def
__init__
(
self
,
tracks
=
None
,
optimizers
=
()):
self
.
_tracks
=
tracks
self
.
optimizers
=
list
(
optimizers
)
def
__init__
(
self
):
self
.
verbose
=
config
.
metaopt
.
verbose
self
.
track_dict
=
defaultdict
(
lambda
:
[])
self
.
tag_dict
=
defaultdict
(
lambda
:
[])
self
.
_tracks
=
[]
self
.
optimizers
=
[]
def
register
(
self
,
optimizer
):
def
register
(
self
,
optimizer
,
tag_list
):
self
.
optimizers
.
append
(
optimizer
)
for
c
in
optimizer
.
tracks
():
self
.
track_dict
[
c
]
.
append
(
optimizer
)
self
.
_tracks
.
append
(
c
)
for
tag
in
tag_list
:
self
.
tag_dict
[
tag
]
.
append
(
optimizer
)
def
tracks
(
self
):
return
self
.
_tracks
...
...
@@ -1167,39 +1174,40 @@ class LocalMetaOptimizer(LocalOptimizer):
missing
.
difference_update
(
givens
.
keys
())
# ensure we have data for all input variables that need it
if
missing
:
if
self
.
verbose
:
if
self
.
verbose
>
0
:
print
((
"
%
s cannot meta-optimize
%
s, "
"
%
d of
%
d input shapes unknown"
%
(
self
.
__class__
.
__name__
,
node
,
len
(
missing
),
node
.
nin
)))
return
# now we can apply the different optimizations in turn,
# compile the resulting subgraphs and time their execution
if
self
.
verbose
:
if
self
.
verbose
>
1
:
print
((
"
%
s meta-optimizing
%
s (
%
d choices):"
%
(
self
.
__class__
.
__name__
,
node
,
len
(
self
.
optimizers
))))
(
self
.
__class__
.
__name__
,
node
,
len
(
self
.
get_opts
(
node
)
))))
timings
=
[]
for
opt
in
self
.
optimizers
:
for
opt
in
self
.
get_opts
(
node
)
:
outputs
=
opt
.
transform
(
node
)
if
outputs
:
try
:
fn
=
theano
.
function
([],
outputs
,
givens
=
givens
,
on_unused_input
=
'ignore'
)
timing
=
min
(
self
.
time_call
(
fn
)
for
_
in
range
(
3
))
fn
.
trust_input
=
True
timing
=
min
(
self
.
time_call
(
fn
)
for
_
in
range
(
2
))
except
Exception
as
e
:
if
self
.
verbose
:
if
self
.
verbose
>
0
:
print
(
"*
%
s: exception"
%
opt
,
e
)
continue
else
:
if
self
.
verbose
:
if
self
.
verbose
>
1
:
print
(
"*
%
s:
%.5
g sec"
%
(
opt
,
timing
))
timings
.
append
((
timing
,
outputs
,
opt
))
else
:
if
self
.
verbose
:
if
self
.
verbose
>
0
:
print
(
"*
%
s: not applicable"
%
opt
)
# finally, we choose the fastest one
if
timings
:
timings
.
sort
()
if
self
.
verbose
:
if
self
.
verbose
>
1
:
print
(
"=
%
s"
%
timings
[
0
][
2
])
return
timings
[
0
][
1
]
return
...
...
@@ -1213,6 +1221,12 @@ class LocalMetaOptimizer(LocalOptimizer):
"""
raise
NotImplementedError
()
def
get_opts
(
self
,
node
):
"""
Can be overrided to change the way opts are selected
"""
return
self
.
track_dict
[
type
(
node
.
op
)]
def
time_call
(
self
,
fn
):
start
=
time
.
time
()
fn
()
...
...
@@ -2313,7 +2327,6 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self
.
final_optimizers
=
[]
self
.
cleanup_optimizers
=
[]
self
.
tracks_on_change_inputs
=
tracks_on_change_inputs
for
opt
in
optimizers
:
if
isinstance
(
opt
,
LocalOptimizer
):
if
opt
.
tracks
()
is
None
:
...
...
theano/gpuarray/dnn.py
浏览文件 @
3e86efec
...
...
@@ -1014,7 +1014,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv
=
GpuDnnConvGradW
()(
img
,
kerns
,
out
,
desc
)
return
as_gpuarray_variable
(
conv
.
dimshuffle
(
1
,
0
,
2
,
3
),
ctx_name
)
elif
(
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
dilation
==
(
1
,
1
)
and
elif
(
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
direction_hint
!=
'forward!'
and
num_groups
==
1
):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
...
...
@@ -1024,11 +1024,11 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), dilation=(1, 1),
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
out_shp
=
(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
,
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
)
shape_i
(
img
,
2
,
fgraph
)
+
(
shape_i
(
kerns
,
2
,
fgraph
)
-
1
)
*
dilation
[
0
]
,
shape_i
(
img
,
3
,
fgraph
)
+
(
shape_i
(
kerns
,
3
,
fgraph
)
-
1
)
*
dilation
[
1
]
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
dilation
=
(
1
,
1
)
,
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
dilation
=
dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
return
GpuDnnConvGradI
()(
kerns
,
img
,
out
,
desc
)
...
...
@@ -1133,7 +1133,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
conv
=
GpuDnnConvGradW
()(
img
,
kerns
,
out
,
desc
)
return
as_gpuarray_variable
(
conv
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
),
ctx_name
)
elif
(
border_mode
==
'full'
and
subsample
==
(
1
,
1
,
1
)
and
dilation
==
(
1
,
1
,
1
)
and
elif
(
border_mode
==
'full'
and
subsample
==
(
1
,
1
,
1
)
and
direction_hint
!=
'forward!'
):
# Special case: We can be faster by using GpuDnnConvGradI to compute
# the full convolution as the backward pass of a valid convolution.
...
...
@@ -1143,12 +1143,12 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1), dilation=(1
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
out_shp
=
(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
,
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
,
shape_i
(
img
,
4
,
fgraph
)
+
shape_i
(
kerns
,
4
,
fgraph
)
-
1
)
shape_i
(
img
,
2
,
fgraph
)
+
(
shape_i
(
kerns
,
2
,
fgraph
)
-
1
)
*
dilation
[
0
]
,
shape_i
(
img
,
3
,
fgraph
)
+
(
shape_i
(
kerns
,
3
,
fgraph
)
-
1
)
*
dilation
[
1
]
,
shape_i
(
img
,
4
,
fgraph
)
+
(
shape_i
(
kerns
,
4
,
fgraph
)
-
1
)
*
dilation
[
2
]
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
dilation
=
(
1
,
1
,
1
)
,
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
dilation
=
dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
return
GpuDnnConvGradI
()(
kerns
,
img
,
out
,
desc
)
...
...
@@ -2888,6 +2888,215 @@ def local_abstractconv_cudnn(node):
return
local_abstractconv3d_cudnn_graph
(
node
.
op
,
ctx
,
node
.
inputs
,
node
.
outputs
)
@local_optimizer
([
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
])
def
local_abstractconv_cudnn_alt
(
node
):
if
(
not
isinstance
(
node
.
op
,
(
AbstractConv2d
,
AbstractConv2d_gradWeights
,
AbstractConv2d_gradInputs
))):
return
if
version
(
raises
=
False
)
<
6000
and
node
.
op
.
filter_dilation
!=
(
1
,
1
):
return
None
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
if
not
dnn_available
(
inp1
.
type
.
context_name
):
return
op
=
node
.
op
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
precision
=
get_precision
(
None
,
[
inp1
,
inp2
])
if
node
.
op
.
filter_flip
:
conv_mode
=
'conv'
else
:
conv_mode
=
'cross'
if
isinstance
(
op
,
AbstractConv2d
):
if
border_mode
==
'half'
or
subsample
!=
(
1
,
1
)
or
num_groups
!=
1
:
return
None
if
border_mode
==
'full'
:
direction_hint
=
'bprop inputs'
elif
border_mode
==
'valid'
and
filter_dilation
==
(
1
,
1
):
direction_hint
=
'bprop weights'
else
:
return
None
rval
=
dnn_conv
(
inp1
,
inp2
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
direction_hint
=
direction_hint
,
conv_mode
=
conv_mode
,
num_groups
=
num_groups
)
elif
isinstance
(
op
,
AbstractConv2d_gradWeights
):
if
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
filter_dilation
==
(
1
,
1
)
and
num_groups
==
1
):
img
=
gpu_contiguous
(
inp1
)
topgrad
=
gpu_contiguous
(
inp2
)
ctx_name
=
infer_context_name
(
img
,
topgrad
)
img
=
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
))
topgrad
=
gpu_contiguous
(
topgrad
.
dimshuffle
(
1
,
0
,
2
,
3
))
ishape
=
[
shape_i_op
(
i
)(
img
)
for
i
in
range
(
img
.
ndim
)]
tshape
=
[
shape_i_op
(
i
)(
topgrad
)
for
i
in
range
(
topgrad
.
ndim
)]
out_shp
=
get_conv_output_shape
(
ishape
,
tshape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
'cross'
,
precision
=
precision
)(
out
.
shape
)
conv
=
GpuDnnConv
(
algo
=
None
,
num_groups
=
num_groups
)(
img
,
topgrad
,
out
,
desc
)
if
conv_mode
==
'conv'
:
conv
=
conv
[:,
:,
::
-
1
,
::
-
1
]
rval
=
as_gpuarray_variable
(
conv
.
dimshuffle
(
1
,
0
,
2
,
3
),
ctx_name
)
else
:
return
None
elif
isinstance
(
op
,
AbstractConv2d_gradInputs
):
if
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
:
kerns
=
gpu_contiguous
(
inp1
.
dimshuffle
(
1
,
0
,
2
,
3
))
topgrad
=
gpu_contiguous
(
inp2
)
ctx_name
=
infer_context_name
(
kerns
,
topgrad
)
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
desc
=
GpuDnnConvDesc
(
border_mode
=
'full'
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
tshape
=
[
shape_i_op
(
i
)(
topgrad
)
for
i
in
range
(
topgrad
.
ndim
)]
kshape
=
[
shape_i_op
(
i
)(
kerns
)
for
i
in
range
(
kerns
.
ndim
)]
shape
=
get_conv_output_shape
(
tshape
,
kshape
,
border_mode
=
'full'
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
shape
=
assert_conv_shape
(
shape
)
out
=
GpuAllocEmpty
(
dtype
=
topgrad
.
dtype
,
context_name
=
ctx_name
)(
*
shape
)
rval
=
GpuDnnConv
(
algo
=
None
,
num_groups
=
num_groups
)(
topgrad
,
kerns
,
out
,
desc
)
else
:
return
None
return
[
rval
]
@local_optimizer
([
AbstractConv3d
,
AbstractConv3d_gradWeights
,
AbstractConv3d_gradInputs
])
def
local_abstractconv3d_cudnn_alt
(
node
):
if
(
not
isinstance
(
node
.
op
,
(
AbstractConv3d
,
AbstractConv3d_gradWeights
,
AbstractConv3d_gradInputs
))):
return
if
version
(
raises
=
False
)
<
6000
and
node
.
op
.
filter_dilation
!=
(
1
,
1
,
1
):
return
None
inp1
=
node
.
inputs
[
0
]
inp2
=
node
.
inputs
[
1
]
if
not
dnn_available
(
inp1
.
type
.
context_name
):
return
op
=
node
.
op
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
precision
=
get_precision
(
None
,
[
inp1
,
inp2
])
if
node
.
op
.
filter_flip
:
conv_mode
=
'conv'
else
:
conv_mode
=
'cross'
if
isinstance
(
op
,
AbstractConv3d
):
if
border_mode
==
'half'
or
subsample
!=
(
1
,
1
,
1
):
return
None
if
border_mode
==
'full'
:
direction_hint
=
'bprop inputs'
elif
border_mode
==
'valid'
and
filter_dilation
==
(
1
,
1
,
1
):
direction_hint
=
'bprop weights'
else
:
return
None
rval
=
dnn_conv3d
(
inp1
,
inp2
,
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
direction_hint
=
direction_hint
,
conv_mode
=
conv_mode
)
elif
isinstance
(
op
,
AbstractConv3d_gradWeights
):
if
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
,
1
)
and
filter_dilation
==
(
1
,
1
,
1
)):
img
=
gpu_contiguous
(
inp1
)
topgrad
=
gpu_contiguous
(
inp2
)
ctx_name
=
infer_context_name
(
img
,
topgrad
)
img
=
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
topgrad
=
gpu_contiguous
(
topgrad
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
ishape
=
[
shape_i_op
(
i
)(
img
)
for
i
in
range
(
img
.
ndim
)]
tshape
=
[
shape_i_op
(
i
)(
topgrad
)
for
i
in
range
(
topgrad
.
ndim
)]
out_shp
=
get_conv_output_shape
(
ishape
,
tshape
,
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
out_shp
=
assert_conv_shape
(
out_shp
)
out
=
GpuAllocEmpty
(
dtype
=
img
.
dtype
,
context_name
=
ctx_name
)(
*
out_shp
)
desc
=
GpuDnnConvDesc
(
border_mode
=
border_mode
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
'cross'
,
precision
=
precision
)(
out
.
shape
)
conv
=
GpuDnnConv
(
algo
=
None
)(
img
,
topgrad
,
out
,
desc
)
if
conv_mode
==
'conv'
:
conv
=
conv
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
rval
=
as_gpuarray_variable
(
conv
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
),
ctx_name
)
else
:
return
None
elif
isinstance
(
op
,
AbstractConv3d_gradInputs
):
if
border_mode
==
'valid'
and
subsample
==
(
1
,
1
,
1
):
kerns
=
gpu_contiguous
(
inp1
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
))
topgrad
=
gpu_contiguous
(
inp2
)
ctx_name
=
infer_context_name
(
kerns
,
topgrad
)
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
desc
=
GpuDnnConvDesc
(
border_mode
=
'full'
,
subsample
=
subsample
,
dilation
=
filter_dilation
,
conv_mode
=
conv_mode
,
precision
=
precision
)(
kerns
.
shape
)
tshape
=
[
shape_i_op
(
i
)(
topgrad
)
for
i
in
range
(
topgrad
.
ndim
)]
kshape
=
[
shape_i_op
(
i
)(
kerns
)
for
i
in
range
(
kerns
.
ndim
)]
shape
=
get_conv_output_shape
(
tshape
,
kshape
,
border_mode
=
'full'
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
shape
=
assert_conv_shape
(
shape
)
out
=
GpuAllocEmpty
(
dtype
=
topgrad
.
dtype
,
context_name
=
ctx_name
)(
*
shape
)
rval
=
GpuDnnConv
(
algo
=
None
)(
topgrad
,
kerns
,
out
,
desc
)
else
:
return
None
return
[
rval
]
@local_optimizer
([
AbstractConv2d_gradWeights
,
AbstractConv3d_gradWeights
])
def
local_abstractconv_gw_cudnn
(
node
):
ctx
=
infer_context_name
(
*
node
.
inputs
)
...
...
theano/gpuarray/opt.py
浏览文件 @
3e86efec
...
...
@@ -15,6 +15,7 @@ from theano.compile.ops import shape_i
from
theano.gof
import
(
local_optimizer
,
EquilibriumDB
,
TopoOptimizer
,
LocalGroupDB
,
SequenceDB
,
Optimizer
,
DB
,
toolbox
,
graph
)
from
theano.gof.opt
import
LocalMetaOptimizer
from
theano.ifelse
import
IfElse
from
theano.misc.ordered_set
import
OrderedSet
...
...
@@ -23,7 +24,7 @@ from theano.scalar.basic import log, neg, true_div
from
theano.scalar.basic_scipy
import
Erfinv
,
Erfcinv
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.tensor.nnet
import
bn
from
theano.tensor.nnet
import
bn
,
conv3d2d
from
theano.tensor.nnet.conv
import
ConvOp
from
theano.tensor.nnet.blocksparse
import
SparseBlockGemv
,
SparseBlockOuter
from
theano.tensor.nnet.abstract_conv
import
(
BaseAbstractConv
,
...
...
@@ -32,12 +33,14 @@ from theano.tensor.nnet.abstract_conv import (BaseAbstractConv,
AbstractConv2d_gradInputs
,
AbstractConv3d
,
AbstractConv3d_gradWeights
,
AbstractConv3d_gradInputs
)
AbstractConv3d_gradInputs
,
get_conv_output_shape
)
from
theano.tensor.nnet.neighbours
import
Images2Neibs
from
theano.tensor.nnet.ctc
import
ConnectionistTemporalClassification
import
theano.tensor.nlinalg
as
nlinalg
import
theano.tensor.signal.pool
as
pool
import
theano.tensor.slinalg
as
slinalg
from
collections
import
Counter
from
theano.tests.breakpoint
import
PdbBreakpoint
...
...
@@ -1625,6 +1628,49 @@ def local_abstractconv_gemm(node):
return
[
rval
]
@local_optimizer
([
AbstractConv2d
])
def
local_abstractconv_gemm_alt
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv2d
):
return
None
img
,
kern
=
node
.
inputs
if
(
not
isinstance
(
img
.
type
,
GpuArrayType
)
or
not
isinstance
(
kern
.
type
,
GpuArrayType
)):
return
None
ctx
=
infer_context_name
(
img
,
kern
)
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
if
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
kern
=
kern
.
dimshuffle
(
1
,
0
,
2
,
3
)
rval
=
GpuCorrMM_gradInputs
(
'valid'
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
img
))
elif
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
filter_dilation
==
(
1
,
1
)
and
num_groups
==
1
):
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
GpuCorrMM_gradWeights
(
border_mode
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
)),
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
)))
rval
=
as_gpuarray_variable
(
rval
.
dimshuffle
(
1
,
0
,
2
,
3
),
context_name
=
ctx
)
else
:
return
None
return
[
rval
]
@local_optimizer
([
AbstractConv3d
])
def
local_abstractconv3d_gemm
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv3d
):
...
...
@@ -1694,6 +1740,74 @@ def local_abstractconv3d_gemm(node):
return
[
rval
]
@local_optimizer
([
AbstractConv3d
])
def
local_abstractconv3d_alt
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv3d
):
return
None
img
,
kern
=
node
.
inputs
if
(
not
isinstance
(
img
.
type
,
GpuArrayType
)
or
not
isinstance
(
kern
.
type
,
GpuArrayType
)):
return
None
ctx
=
infer_context_name
(
img
,
kern
)
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
if
((
border_mode
==
'full'
)
and
(
subsample
==
(
1
,
1
,
1
))):
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
kern
=
kern
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
)
rval
=
GpuCorr3dMM_gradInputs
(
'valid'
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
kern
),
gpu_contiguous
(
img
))
elif
(
subsample
==
(
1
,
1
,
1
)
and
filter_dilation
==
(
1
,
1
,
1
)
and
border_mode
==
'valid'
):
if
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
rval
=
GpuCorr3dMM_gradWeights
(
border_mode
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
)),
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
)))
rval
=
as_gpuarray_variable
(
rval
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
),
context_name
=
ctx
)
else
:
return
None
return
[
rval
]
@local_optimizer
([
AbstractConv3d
])
def
local_abstractconv3d2d
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv3d
):
return
None
img
,
kern
=
node
.
inputs
if
(
not
isinstance
(
img
.
type
,
GpuArrayType
)
or
not
isinstance
(
kern
.
type
,
GpuArrayType
)):
return
None
ctx
=
infer_context_name
(
img
,
kern
)
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
if
subsample
==
(
1
,
1
,
1
)
and
filter_dilation
==
(
1
,
1
,
1
):
reorder_array
=
[
0
,
2
,
1
,
3
,
4
]
rval
=
conv3d2d
.
conv3d
(
gpu_contiguous
(
img
.
dimshuffle
(
*
reorder_array
)),
gpu_contiguous
(
kern
.
dimshuffle
(
*
reorder_array
)),
[
node
.
op
.
imshp
[
i
]
for
i
in
reorder_array
],
[
node
.
op
.
kshp
[
i
]
for
i
in
reorder_array
],
border_mode
=
border_mode
)
rval
=
as_gpuarray_variable
(
rval
.
dimshuffle
(
*
reorder_array
),
context_name
=
ctx
)
return
[
rval
]
else
:
return
None
@local_optimizer
([
AbstractConv2d_gradWeights
])
def
local_abstractconv_gradweights_gemm
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
...
...
@@ -1716,6 +1830,70 @@ def local_abstractconv_gradweights_gemm(node):
return
[
rval
]
@local_optimizer
([
AbstractConv2d_gradWeights
])
def
local_abstractconv_gemm_gradweights_alt
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv2d_gradWeights
):
return
None
img
,
topgrad
,
shape
=
node
.
inputs
if
not
isinstance
(
img
.
type
,
GpuArrayType
)
or
\
not
isinstance
(
topgrad
.
type
,
GpuArrayType
):
return
None
ctx
=
infer_context_name
(
img
,
topgrad
)
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
if
(
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
filter_dilation
==
(
1
,
1
)
and
num_groups
==
1
):
rval
=
GpuCorrMM
(
border_mode
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
)),
gpu_contiguous
(
topgrad
.
dimshuffle
(
1
,
0
,
2
,
3
)))
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
]
rval
=
rval
.
dimshuffle
(
1
,
0
,
2
,
3
)
rval
=
tensor
.
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
rval
=
as_gpuarray_variable
(
rval
,
context_name
=
ctx
)
return
[
rval
]
else
:
return
None
@local_optimizer
([
AbstractConv3d_gradWeights
])
def
local_abstractconv3d_gemm_gradweights_alt
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv3d_gradWeights
):
return
None
img
,
topgrad
,
shape
=
node
.
inputs
if
not
isinstance
(
img
.
type
,
GpuArrayType
)
or
\
not
isinstance
(
topgrad
.
type
,
GpuArrayType
):
return
None
ctx
=
infer_context_name
(
img
,
topgrad
)
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
if
border_mode
==
'valid'
and
subsample
==
(
1
,
1
,
1
)
and
filter_dilation
==
(
1
,
1
,
1
):
rval
=
GpuCorr3dMM
(
border_mode
,
subsample
,
filter_dilation
)(
gpu_contiguous
(
img
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
)),
gpu_contiguous
(
topgrad
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
)))
if
node
.
op
.
filter_flip
:
rval
=
rval
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
rval
=
rval
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
)
rval
=
tensor
.
patternbroadcast
(
rval
,
node
.
outputs
[
0
]
.
broadcastable
)
rval
=
as_gpuarray_variable
(
rval
,
context_name
=
ctx
)
return
[
rval
]
else
:
return
None
@local_optimizer
([
AbstractConv3d_gradWeights
])
def
local_abstractconv3d_gradweights_gemm
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv3d_gradWeights
):
...
...
@@ -1757,6 +1935,33 @@ def local_abstractconv_gradinputs_gemm(node):
return
[
rval
]
@local_optimizer
([
AbstractConv2d_gradInputs
])
def
local_abstractconv_gradinputs_gemm_alt
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv2d_gradInputs
):
return
None
kern
,
topgrad
,
shape
=
node
.
inputs
if
not
isinstance
(
kern
.
type
,
GpuArrayType
)
or
\
not
isinstance
(
topgrad
.
type
,
GpuArrayType
):
return
None
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
num_groups
=
node
.
op
.
num_groups
if
border_mode
==
'valid'
and
subsample
==
(
1
,
1
)
and
num_groups
==
1
:
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
]
rval
=
GpuCorrMM
(
border_mode
=
'full'
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)(
gpu_contiguous
(
topgrad
),
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
)))
return
[
rval
]
else
:
return
None
@local_optimizer
([
AbstractConv3d_gradInputs
])
def
local_abstractconv3d_gradinputs_gemm
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv3d_gradInputs
):
...
...
@@ -1776,6 +1981,111 @@ def local_abstractconv3d_gradinputs_gemm(node):
return
[
rval
]
@local_optimizer
([
AbstractConv3d_gradInputs
])
def
local_abstractconv3d_gradinputs_gemm_alt
(
node
):
if
not
isinstance
(
node
.
op
,
AbstractConv3d_gradInputs
):
return
None
kern
,
topgrad
,
shape
=
node
.
inputs
if
not
isinstance
(
kern
.
type
,
GpuArrayType
)
or
\
not
isinstance
(
topgrad
.
type
,
GpuArrayType
):
return
None
border_mode
=
node
.
op
.
border_mode
subsample
=
node
.
op
.
subsample
filter_dilation
=
node
.
op
.
filter_dilation
if
border_mode
==
'valid'
and
subsample
==
(
1
,
1
,
1
):
if
not
node
.
op
.
filter_flip
:
kern
=
kern
[:,
:,
::
-
1
,
::
-
1
,
::
-
1
]
rval
=
GpuCorr3dMM
(
border_mode
=
'full'
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)(
gpu_contiguous
(
topgrad
),
gpu_contiguous
(
kern
.
dimshuffle
(
1
,
0
,
2
,
3
,
4
)))
return
[
rval
]
else
:
return
None
class
ConvMetaOptimizer
(
LocalMetaOptimizer
):
def
__init__
(
self
):
super
(
ConvMetaOptimizer
,
self
)
.
__init__
()
def
time_call
(
self
,
fn
):
start
=
time
.
time
()
fn
()[
0
]
.
sync
()
return
time
.
time
()
-
start
def
provide_inputs
(
self
,
node
,
inputs
):
result
=
{}
shapes
=
(
node
.
op
.
imshp
,
node
.
op
.
kshp
)
if
(
node
.
op
.
imshp
is
None
or
node
.
op
.
kshp
is
None
or
any
([
s
is
None
for
shape
in
shapes
for
s
in
shape
])):
return
result
if
type
(
node
.
op
)
in
[
AbstractConv2d
,
AbstractConv3d
]:
img
,
kern
=
node
.
inputs
for
(
var
,
shape
)
in
zip
((
img
,
kern
),
shapes
):
result
[
var
]
=
theano
.
shared
(
np
.
random
.
random
(
shape
)
.
astype
(
var
.
dtype
),
var
.
name
,
broadcastable
=
var
.
broadcastable
,
borrow
=
True
)
if
type
(
node
.
op
)
in
[
AbstractConv2d_gradWeights
,
AbstractConv3d_gradWeights
]:
img
,
top
,
kshape
=
node
.
inputs
tshp
=
get_conv_output_shape
(
node
.
op
.
imshp
,
node
.
op
.
kshp
,
node
.
op
.
border_mode
,
node
.
op
.
subsample
,
node
.
op
.
filter_dilation
)
result
[
kshape
]
=
theano
.
tensor
.
as_tensor_variable
(
node
.
op
.
kshp
[
2
:])
for
(
var
,
shape
)
in
zip
((
img
,
top
),
(
node
.
op
.
imshp
,
tshp
)):
result
[
var
]
=
theano
.
shared
(
np
.
random
.
random
(
shape
)
.
astype
(
var
.
dtype
),
var
.
name
,
broadcastable
=
var
.
broadcastable
,
borrow
=
True
)
if
type
(
node
.
op
)
in
[
AbstractConv2d_gradInputs
,
AbstractConv3d_gradInputs
]:
kern
,
top
,
ishape
=
node
.
inputs
tshp
=
get_conv_output_shape
(
node
.
op
.
imshp
,
node
.
op
.
kshp
,
node
.
op
.
border_mode
,
node
.
op
.
subsample
,
node
.
op
.
filter_dilation
)
result
[
ishape
]
=
theano
.
tensor
.
as_tensor_variable
(
node
.
op
.
imshp
[
2
:])
for
(
var
,
shape
)
in
zip
((
kern
,
top
),
(
node
.
op
.
kshp
,
tshp
)):
result
[
var
]
=
theano
.
shared
(
np
.
random
.
random
(
shape
)
.
astype
(
var
.
dtype
),
var
.
name
,
broadcastable
=
var
.
broadcastable
,
borrow
=
True
)
return
result
def
get_opts
(
self
,
node
):
opts
=
Counter
([
opt
for
opt
in
self
.
track_dict
[
type
(
node
.
op
)]
if
opt
in
self
.
tag_dict
[
'default'
]])
include_tags
=
config
.
metaopt
.
optimizer_including
.
split
(
':'
)
exclude_tags
=
config
.
metaopt
.
optimizer_excluding
.
split
(
':'
)
for
in_opt
in
include_tags
:
opts
.
update
([
opt
for
opt
in
self
.
track_dict
[
type
(
node
.
op
)]
if
opt
in
self
.
tag_dict
[
in_opt
]])
for
ex_opt
in
exclude_tags
:
opts
.
subtract
([
opt
for
opt
in
self
.
track_dict
[
type
(
node
.
op
)]
if
opt
in
self
.
tag_dict
[
ex_opt
]])
opts
=
list
(
opts
+
Counter
())
return
opts
# This deals with any abstract convs that have a transfer somewhere
@register_opt
(
'fast_compile'
,
'conv_dnn'
,
'cudnn'
)
@op_lifter
([
AbstractConv2d
,
...
...
@@ -2354,8 +2664,12 @@ register_opt('fast_compile')(abstractconv_groupopt)
# We import these opts here instead of at the top of this file
# to avoid a circular dependency problem with dnn
from
.dnn
import
(
local_abstractconv_cudnn
,
local_abstractconv_gw_cudnn
,
local_abstractconv_gi_cudnn
)
# noqa: 402
from
.dnn
import
(
local_abstractconv_cudnn
,
local_abstractconv_gw_cudnn
,
local_abstractconv_gi_cudnn
,
# noqa: 402
local_abstractconv_cudnn_alt
,
local_abstractconv3d_cudnn_alt
)
abstractconv_groupopt
.
register
(
'local_abstractconv_dnn'
,
local_abstractconv_cudnn
,
20
,
'conv_dnn'
,
...
...
@@ -2393,6 +2707,46 @@ abstractconv_groupopt.register('local_abstractconv3d_gradinputs',
'conv_gemm'
,
'gpuarray'
,
'fast_compile'
,
'fast_run'
)
conv_metaopt
=
ConvMetaOptimizer
()
conv_metaopt
.
register
(
local_abstractconv_cudnn
,
[
'default'
,
'cudnn'
,
'conv_dnn'
])
conv_metaopt
.
register
(
local_abstractconv_gw_cudnn
,
[
'default'
,
'cudnn'
,
'conv_dnn'
])
conv_metaopt
.
register
(
local_abstractconv_gi_cudnn
,
[
'default'
,
'cudnn'
,
'conv_dnn'
])
conv_metaopt
.
register
(
local_abstractconv_gemm
,
[
'default'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv3d_gemm
,
[
'default'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv_gradweights_gemm
,
[
'default'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv3d_gradweights_gemm
,
[
'default'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv_gradinputs_gemm
,
[
'default'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv3d_gradinputs_gemm
,
[
'default'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv_gemm_alt
,
[
'default'
,
'alternative'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv_gemm_gradweights_alt
,
[
'default'
,
'alternative'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv_gradinputs_gemm_alt
,
[
'default'
,
'alternative'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv_cudnn_alt
,
[
'default'
,
'alternative'
,
'cudnn'
,
'conv_dnn'
])
conv_metaopt
.
register
(
local_abstractconv3d_cudnn_alt
,
[
'default'
,
'alternative'
,
'cudnn'
,
'conv_dnn'
])
conv_metaopt
.
register
(
local_abstractconv3d_alt
,
[
'default'
,
'alternative'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv3d_gemm_gradweights_alt
,
[
'default'
,
'alternative'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv3d_gradinputs_gemm_alt
,
[
'default'
,
'alternative'
,
'conv_gemm'
])
conv_metaopt
.
register
(
local_abstractconv3d2d
,
[
'alternative'
,
'conv3d2d'
])
abstractconv_groupopt
.
register
(
'conv_metaopt'
,
conv_metaopt
,
'conv_meta'
,
position
=
0
)
# Register cuDNN batch normalization implementation
...
...
theano/gpuarray/tests/test_opt.py
浏览文件 @
3e86efec
...
...
@@ -22,6 +22,9 @@ from ..subtensor import GpuSubtensor
from
..linalg
import
GpuCusolverSolve
,
cusolver_available
,
GpuCholesky
from
.config
import
mode_with_gpu
,
mode_without_gpu
,
test_ctx_name
,
SkipTest
import
unittest
from
theano.tensor.nnet
import
abstract_conv
from
theano.gpuarray
import
dnn
,
blas
def
test_local_assert
():
...
...
@@ -699,3 +702,200 @@ def test_crossentropycategorical1hot_lifter():
for
n
in
f
.
maker
.
fgraph
.
apply_nodes
)
f
(
rng
.
uniform
(
0.1
,
0.9
,
(
13
,
5
))
.
astype
(
theano
.
config
.
floatX
),
rng
.
randint
(
5
,
size
=
(
13
,)))
class
Conv_opt_test
(
unittest
.
TestCase
):
def
optimizer_2d
(
self
,
input_shapes
,
direction
,
include_tags
,
exclude_tags
,
op
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
filter_dilation
=
(
1
,
1
)):
inp1
=
theano
.
shared
(
np
.
random
.
random
(
input_shapes
[
0
])
.
astype
(
theano
.
config
.
floatX
))
inp2
=
theano
.
shared
(
np
.
random
.
random
(
input_shapes
[
1
])
.
astype
(
theano
.
config
.
floatX
))
if
(
direction
==
0
):
conv_op
=
abstract_conv
.
conv2d
(
inp1
,
inp2
,
input_shapes
[
0
],
input_shapes
[
1
],
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
if
(
direction
==
1
):
conv_op
=
abstract_conv
.
conv2d_grad_wrt_weights
(
inp1
,
inp2
,
input_shapes
[
2
],
input_shapes
[
0
],
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
if
(
direction
==
2
):
conv_op
=
abstract_conv
.
conv2d_grad_wrt_inputs
(
inp1
,
inp2
,
input_shapes
[
2
],
input_shapes
[
1
],
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
theano
.
config
.
metaopt
.
optimizer_including
=
include_tags
theano
.
config
.
metaopt
.
optimizer_excluding
=
exclude_tags
mode
=
mode_with_gpu
.
including
(
'conv_meta'
)
ref_func
=
theano
.
function
([],
conv_op
,
mode
=
mode_with_gpu
)
conv_func
=
theano
.
function
([],
conv_op
,
mode
=
mode
)
assert
any
([
isinstance
(
node
.
op
,
op
)
for
node
in
conv_func
.
maker
.
fgraph
.
toposort
()])
utt
.
assert_allclose
(
conv_func
(),
ref_func
())
def
optimizer_3d
(
self
,
input_shapes
,
direction
,
include_tags
,
exclude_tags
,
op
,
border_mode
=
'valid'
,
subsample
=
(
1
,
1
,
1
),
filter_dilation
=
(
1
,
1
,
1
)):
inp1
=
theano
.
shared
(
np
.
random
.
random
(
input_shapes
[
0
])
.
astype
(
theano
.
config
.
floatX
))
inp2
=
theano
.
shared
(
np
.
random
.
random
(
input_shapes
[
1
])
.
astype
(
theano
.
config
.
floatX
))
if
(
direction
==
0
):
conv_op
=
abstract_conv
.
conv3d
(
inp1
,
inp2
,
input_shapes
[
0
],
input_shapes
[
1
],
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
if
(
direction
==
1
):
conv_op
=
abstract_conv
.
conv3d_grad_wrt_weights
(
inp1
,
inp2
,
input_shapes
[
2
],
input_shapes
[
0
],
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
if
(
direction
==
2
):
conv_op
=
abstract_conv
.
conv3d_grad_wrt_inputs
(
inp1
,
inp2
,
input_shapes
[
2
],
input_shapes
[
1
],
border_mode
=
border_mode
,
subsample
=
subsample
,
filter_dilation
=
filter_dilation
)
theano
.
config
.
metaopt
.
optimizer_including
=
include_tags
theano
.
config
.
metaopt
.
optimizer_excluding
=
exclude_tags
mode
=
mode_with_gpu
.
including
(
'conv_meta'
)
ref_func
=
theano
.
function
([],
conv_op
,
mode
=
mode_with_gpu
)
conv_func
=
theano
.
function
([],
conv_op
,
mode
=
mode
)
if
op
is
not
None
:
assert
any
([
isinstance
(
node
.
op
,
op
)
for
node
in
conv_func
.
maker
.
fgraph
.
toposort
()])
utt
.
assert_allclose
(
conv_func
(),
ref_func
())
def
test_optimizers
(
self
):
imshp2d
=
[(
2
,
3
,
5
,
5
),
(
2
,
2
,
5
,
7
),
(
2
,
1
,
3
,
3
)]
kshp2d
=
[(
4
,
3
,
3
,
3
),
(
3
,
2
,
3
,
5
),
(
4
,
1
,
1
,
1
)]
tshp2d
=
[(
2
,
4
,
3
,
3
),
(
2
,
3
,
3
,
3
),
(
2
,
4
,
3
,
3
)]
for
imshp
,
kshp
,
tshp
in
zip
(
imshp2d
,
kshp2d
,
tshp2d
):
# forward passes
self
.
optimizer_2d
([
imshp
,
kshp
,
tshp
],
0
,
'alternative'
,
'conv_dnn:default'
,
blas
.
GpuCorrMM_gradWeights
)
self
.
optimizer_2d
([
imshp
,
kshp
,
tshp
],
0
,
'alternative'
,
'conv_gemm:default'
,
dnn
.
GpuDnnConvGradW
)
# backwards wrt weights
self
.
optimizer_2d
([
imshp
,
tshp
,
kshp
],
1
,
'alternative'
,
'conv_dnn:default'
,
blas
.
GpuCorrMM
)
self
.
optimizer_2d
([
imshp
,
tshp
,
kshp
],
1
,
'alternative'
,
'conv_gemm:default'
,
dnn
.
GpuDnnConv
)
# backwards wrt to inputs
self
.
optimizer_2d
([
tshp
,
kshp
,
imshp
],
2
,
'alternative'
,
'conv_dnn:default'
,
blas
.
GpuCorrMM
)
self
.
optimizer_2d
([
tshp
,
kshp
,
imshp
],
2
,
'alternative'
,
'conv_gemm:default'
,
dnn
.
GpuDnnConv
)
imshp3d
=
[(
2
,
3
,
5
,
5
,
5
),
(
2
,
2
,
5
,
7
,
5
),
(
2
,
1
,
3
,
3
,
3
)]
kshp3d
=
[(
4
,
3
,
3
,
3
,
3
),
(
3
,
2
,
3
,
5
,
3
),
(
4
,
1
,
1
,
1
,
1
)]
tshp3d
=
[(
2
,
4
,
3
,
3
,
3
),
(
2
,
3
,
3
,
3
,
3
),
(
2
,
4
,
3
,
3
,
3
)]
for
imshp
,
kshp
,
tshp
in
zip
(
imshp3d
,
kshp3d
,
tshp3d
):
# forwards passes
self
.
optimizer_3d
([
imshp
,
kshp
,
tshp
],
0
,
'alternative'
,
'conv_dnn:default:conv3d2d'
,
blas
.
GpuCorr3dMM_gradWeights
)
self
.
optimizer_3d
([
imshp
,
kshp
,
tshp
],
0
,
'conv3d2d'
,
'default'
,
None
)
self
.
optimizer_3d
([
imshp
,
kshp
,
tshp
],
0
,
'alternative'
,
'conv_gemm:default:conv3d2d'
,
dnn
.
GpuDnnConvGradW
)
# backward pass wrt weight
self
.
optimizer_3d
([
imshp
,
tshp
,
kshp
],
1
,
'alternative'
,
'conv_dnn:default'
,
blas
.
GpuCorr3dMM
)
self
.
optimizer_3d
([
imshp
,
tshp
,
kshp
],
1
,
'alternative'
,
'conv_gemm:default'
,
dnn
.
GpuDnnConv
)
# backward pass wrt inputs
self
.
optimizer_3d
([
tshp
,
kshp
,
imshp
],
2
,
'alternative'
,
'conv_dnn:default'
,
blas
.
GpuCorr3dMM
)
self
.
optimizer_3d
([
tshp
,
kshp
,
imshp
],
2
,
'alternative'
,
'conv_gemm:default'
,
dnn
.
GpuDnnConv
)
# conv2d forward pass with Non-default border_mode and filter_dilation
imshp2d
=
[(
2
,
3
,
5
,
5
),
(
4
,
2
,
5
,
5
)]
kshp2d
=
[(
4
,
3
,
3
,
3
),
(
3
,
2
,
3
,
3
)]
filter_dilation
=
[(
1
,
1
),
(
2
,
2
)]
for
imshp
,
kshp
,
fdil
in
zip
(
imshp2d
,
kshp2d
,
filter_dilation
):
self
.
optimizer_2d
([
imshp
,
kshp
],
0
,
'alternative'
,
'conv_dnn:default'
,
blas
.
GpuCorrMM_gradInputs
,
border_mode
=
'full'
,
filter_dilation
=
fdil
)
# works only for cudnn > 6.0
self
.
optimizer_2d
([
imshp
,
kshp
],
0
,
'alternative'
,
'conv_gemm:default'
,
dnn
.
GpuDnnConvGradI
,
border_mode
=
'full'
,
filter_dilation
=
fdil
)
# conv3d forward pass with Non-default border_mode and filter_dilation
imshp3d
=
[(
2
,
3
,
5
,
5
,
5
),
(
4
,
2
,
5
,
5
,
5
)]
kshp3d
=
[(
4
,
3
,
3
,
3
,
3
),
(
3
,
2
,
3
,
3
,
3
)]
filter_dilation
=
[(
1
,
1
,
1
),
(
2
,
2
,
2
)]
for
imshp
,
kshp
,
fdil
in
zip
(
imshp3d
,
kshp3d
,
filter_dilation
):
self
.
optimizer_3d
([
imshp
,
kshp
],
0
,
'alternative'
,
'conv_dnn:default:conv3d2d'
,
blas
.
GpuCorr3dMM_gradInputs
,
border_mode
=
'full'
,
filter_dilation
=
fdil
)
# works only for cudnn > 6.0
self
.
optimizer_3d
([
imshp
,
kshp
],
0
,
'alternative'
,
'conv_gemm:default:conv3d2d'
,
dnn
.
GpuDnnConvGradI
,
border_mode
=
'full'
,
filter_dilation
=
fdil
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论