Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
3c70348f
提交
3c70348f
authored
6月 02, 2016
作者:
Frédéric Bastien
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4524 from nouiz/opt
Opt related changes.
上级
0d844076
96430899
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
136 行增加
和
54 行删除
+136
-54
profiling.py
theano/compile/profiling.py
+9
-4
opt.py
theano/gof/opt.py
+67
-23
optdb.py
theano/gof/optdb.py
+12
-1
dnn.py
theano/gpuarray/dnn.py
+7
-7
opt.py
theano/gpuarray/opt.py
+8
-8
check_blas.py
theano/misc/check_blas.py
+1
-0
dnn.py
theano/sandbox/cuda/dnn.py
+2
-1
opt.py
theano/sandbox/cuda/opt.py
+9
-9
blas.py
theano/tensor/blas.py
+2
-1
opt.py
theano/tensor/opt.py
+19
-0
没有找到文件。
theano/compile/profiling.py
浏览文件 @
3c70348f
...
...
@@ -84,10 +84,15 @@ def _atexit_print_fn():
cum_attr
[
key
]
=
val
if
cum
.
optimizer_profile
and
ps
.
optimizer_profile
:
merge
=
cum
.
optimizer_profile
[
0
]
.
merge_profile
(
cum
.
optimizer_profile
[
1
],
ps
.
optimizer_profile
[
1
])
cum
.
optimizer_profile
=
(
cum
.
optimizer_profile
[
0
],
merge
)
try
:
merge
=
cum
.
optimizer_profile
[
0
]
.
merge_profile
(
cum
.
optimizer_profile
[
1
],
ps
.
optimizer_profile
[
1
])
cum
.
optimizer_profile
=
(
cum
.
optimizer_profile
[
0
],
merge
)
except
Exception
as
e
:
print
(
"Got an exception while merging profile"
)
print
(
e
)
cum
.
optimizer_profile
=
None
else
:
cum
.
optimizer_profile
=
None
...
...
theano/gof/opt.py
浏览文件 @
3c70348f
...
...
@@ -220,8 +220,10 @@ class SeqOptimizer(Optimizer, list):
if
fgraph
.
profile
:
validate_before
=
fgraph
.
profile
.
validate_time
sub_validate_time
=
[
validate_before
]
callbacks_before
=
fgraph
.
execute_callbacks_times
.
copy
()
else
:
sub_validate_time
=
[]
callbacks_before
=
[]
callback_before
=
fgraph
.
execute_callbacks_time
nb_node_before
=
len
(
fgraph
.
apply_nodes
)
sub_profs
=
[]
...
...
@@ -249,12 +251,22 @@ class SeqOptimizer(Optimizer, list):
if
fgraph
.
profile
:
validate_time
=
fgraph
.
profile
.
validate_time
-
validate_before
callbacks_time
=
{}
for
k
,
v
in
iteritems
(
fgraph
.
execute_callbacks_times
):
if
k
in
callbacks_before
:
t
=
v
-
callbacks_before
[
k
]
if
t
>
0
:
callbacks_time
[
k
]
=
t
else
:
callbacks_time
[
k
]
=
v
else
:
validate_time
=
None
callbacks_time
=
{}
callback_time
=
fgraph
.
execute_callbacks_time
-
callback_before
return
(
self
,
l
,
validate_time
,
callback_time
,
nb_node_before
,
len
(
fgraph
.
apply_nodes
),
sub_profs
,
sub_validate_time
,
nb_nodes
)
nb_nodes
,
callbacks_time
)
def
__str__
(
self
):
return
"SeqOpt(
%
s)"
%
list
.
__str__
(
self
)
...
...
@@ -274,8 +286,9 @@ class SeqOptimizer(Optimizer, list):
@staticmethod
def
print_profile
(
stream
,
prof
,
level
=
0
):
(
opts
,
prof
,
validate_time
,
callback_time
,
nb_node_before
,
nb_node_after
,
sub_profs
,
sub_validate_time
,
nb_nodes
)
=
prof
(
opts
,
prof
,
validate_time
,
callback_time
,
nb_node_before
,
nb_node_after
,
sub_profs
,
sub_validate_time
,
nb_nodes
,
callbacks_time
)
=
prof
blanc
=
(
' '
*
level
)
print
(
blanc
,
"SeqOptimizer"
,
end
=
' '
,
file
=
stream
)
...
...
@@ -287,9 +300,20 @@ class SeqOptimizer(Optimizer, list):
" before/after optimization"
%
(
sum
(
prof
),
nb_node_before
,
nb_node_after
)),
file
=
stream
)
print
(
blanc
,
"
%.3
fs for callback"
%
(
callback_time
),
file
=
stream
)
print
(
blanc
,
"
%.3
fs for fgraph.validate()"
%
(
validate_time
),
file
=
stream
)
print
(
blanc
,
"
%.3
fs for fgraph.validate()"
%
(
validate_time
),
file
=
stream
)
if
callback_time
>
1
:
print
(
blanc
,
" callbacks_time"
,
file
=
stream
)
for
i
in
sorted
(
iteritems
(
callbacks_time
),
key
=
lambda
a
:
-
a
[
1
]):
if
i
[
1
]
>
0
:
# We want to have the __str__ called, so we can't
# just print i.
print
(
blanc
,
" "
,
i
[
0
],
','
,
i
[
1
],
file
=
stream
)
if
level
==
0
:
print
(
blanc
,
" time - (name, class, index, nodes before, nodes after) - validate time"
,
file
=
stream
)
print
(
blanc
,
" time - (name, class, index, nodes before, nodes after) - validate time"
,
file
=
stream
)
ll
=
[]
for
opt
in
opts
:
if
hasattr
(
opt
,
"__name__"
):
...
...
@@ -298,7 +322,7 @@ class SeqOptimizer(Optimizer, list):
name
=
opt
.
name
idx
=
opts
.
index
(
opt
)
ll
.
append
((
name
,
opt
.
__class__
.
__name__
,
idx
)
+
nb_nodes
[
idx
]
)
idx
))
lll
=
sorted
(
zip
(
prof
,
ll
,
nb_nodes
),
key
=
lambda
a
:
a
[
0
])
for
(
t
,
opt
,
nb_n
)
in
lll
[::
-
1
]:
...
...
@@ -375,6 +399,7 @@ class SeqOptimizer(Optimizer, list):
new_sub_profile
.
append
(
p
[
6
][
idx
])
new_opt
=
SeqOptimizer
(
*
new_l
)
new_callbacks_times
=
merge_dict
(
prof1
[
9
],
prof2
[
9
])
# We need to assert based on the name as we merge also based on
# the name.
assert
set
([
l
.
name
for
l
in
prof1
[
0
]])
.
issubset
(
...
...
@@ -384,7 +409,8 @@ class SeqOptimizer(Optimizer, list):
assert
len
(
new_t
)
==
len
(
new_opt
)
==
len
(
new_sub_profile
)
return
(
new_opt
,
new_t
,
prof1
[
2
]
+
prof2
[
2
],
prof1
[
3
]
+
prof2
[
3
],
-
1
,
-
1
,
new_sub_profile
,
[])
-
1
,
-
1
,
new_sub_profile
,
[],
new_callbacks_times
)
class
_metadict
:
...
...
@@ -838,7 +864,9 @@ class MergeOptimizer(Optimizer):
callbacks_time
=
{}
for
k
,
v
in
iteritems
(
fgraph
.
execute_callbacks_times
):
if
k
in
callbacks_before
:
callbacks_time
[
k
]
=
v
-
callbacks_before
[
k
]
t
=
v
-
callbacks_before
[
k
]
if
t
>
0
:
callbacks_time
[
k
]
=
t
else
:
callbacks_time
[
k
]
=
v
else
:
...
...
@@ -868,7 +896,9 @@ class MergeOptimizer(Optimizer):
print
(
blanc
,
" callbacks_time"
,
file
=
stream
)
for
i
in
sorted
(
iteritems
(
callbacks_time
),
key
=
lambda
a
:
a
[
1
]):
if
i
[
1
]
>
0
:
print
(
i
)
# We want to have the __str__ called, so we can't
# just print i.
print
(
blanc
,
" "
,
i
[
0
],
','
,
i
[
1
],
file
=
stream
)
@staticmethod
def
merge_profile
(
prof1
,
prof2
):
...
...
@@ -1591,10 +1621,14 @@ class PatternSub(LocalOptimizer):
# Use the following classes to apply LocalOptimizers
class
Updater
:
def
__init__
(
self
,
importer
,
pruner
,
chin
):
def
__init__
(
self
,
importer
,
pruner
,
chin
,
name
=
None
):
self
.
importer
=
importer
self
.
pruner
=
pruner
self
.
chin
=
chin
self
.
name
=
name
def
__str__
(
self
):
return
"Updater{
%
s}"
%
str
(
self
.
name
)
def
on_import
(
self
,
fgraph
,
node
,
reason
):
if
self
.
importer
:
...
...
@@ -1694,7 +1728,7 @@ class NavigatorOptimizer(Optimizer):
self
.
ignore_newtrees
=
ignore_newtrees
self
.
failure_callback
=
failure_callback
def
attach_updater
(
self
,
fgraph
,
importer
,
pruner
,
chin
=
None
):
def
attach_updater
(
self
,
fgraph
,
importer
,
pruner
,
chin
=
None
,
name
=
None
):
"""
Install some FunctionGraph listeners to help the navigator deal with
the ignore_trees-related functionality.
...
...
@@ -1709,6 +1743,8 @@ class NavigatorOptimizer(Optimizer):
from the graph.
chin
"on change input" called whenever a node's inputs change.
name
name of the Updater to attach.
Returns
-------
...
...
@@ -1723,7 +1759,7 @@ class NavigatorOptimizer(Optimizer):
if
importer
is
None
and
pruner
is
None
:
return
None
u
=
Updater
(
importer
,
pruner
,
chin
)
u
=
Updater
(
importer
,
pruner
,
chin
,
name
=
name
)
fgraph
.
attach_feature
(
u
)
return
u
...
...
@@ -1875,8 +1911,8 @@ class TopoOptimizer(NavigatorOptimizer):
q
.
remove
(
node
)
except
ValueError
:
pass
u
=
self
.
attach_updater
(
fgraph
,
importer
,
pruner
)
u
=
self
.
attach_updater
(
fgraph
,
importer
,
pruner
,
name
=
getattr
(
self
,
'name'
,
None
)
)
nb
=
0
try
:
t0
=
time
.
time
()
...
...
@@ -1888,10 +1924,8 @@ class TopoOptimizer(NavigatorOptimizer):
current_node
=
node
nb
+=
self
.
process_node
(
fgraph
,
node
)
loop_t
=
time
.
time
()
-
t0
except
Exception
:
finally
:
self
.
detach_updater
(
fgraph
,
u
)
raise
self
.
detach_updater
(
fgraph
,
u
)
callback_time
=
fgraph
.
execute_callbacks_time
-
callback_before
nb_nodes_end
=
len
(
fgraph
.
apply_nodes
)
...
...
@@ -1950,16 +1984,15 @@ class OpKeyOptimizer(NavigatorOptimizer):
q
.
remove
(
node
)
except
ValueError
:
pass
u
=
self
.
attach_updater
(
fgraph
,
importer
,
pruner
)
u
=
self
.
attach_updater
(
fgraph
,
importer
,
pruner
,
name
=
getattr
(
self
,
'name'
,
None
))
try
:
while
q
:
node
=
q
.
pop
()
current_node
=
node
self
.
process_node
(
fgraph
,
node
)
except
Exception
:
finally
:
self
.
detach_updater
(
fgraph
,
u
)
raise
self
.
detach_updater
(
fgraph
,
u
)
def
add_requirements
(
self
,
fgraph
):
"""
...
...
@@ -1990,6 +2023,9 @@ class ChangeTracker:
def
on_attach
(
self
,
fgraph
):
fgraph
.
change_tracker
=
self
def
on_detach
(
self
,
fgraph
):
del
fgraph
.
change_tracker
def
merge_dict
(
d1
,
d2
):
"""
...
...
@@ -2033,6 +2069,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
optimizers
,
failure_callback
=
None
,
ignore_newtrees
=
True
,
tracks_on_change_inputs
=
False
,
max_use_ratio
=
None
,
final_optimizers
=
None
,
cleanup_optimizers
=
None
):
...
...
@@ -2045,6 +2082,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self
.
global_optimizers
=
[]
self
.
final_optimizers
=
[]
self
.
cleanup_optimizers
=
[]
self
.
tracks_on_change_inputs
=
tracks_on_change_inputs
for
opt
in
optimizers
:
if
isinstance
(
opt
,
LocalOptimizer
):
...
...
@@ -2191,8 +2229,14 @@ class EquilibriumOptimizer(NavigatorOptimizer):
q
.
remove
(
node
)
except
ValueError
:
pass
u
=
self
.
attach_updater
(
fgraph
,
importer
,
pruner
)
chin
=
None
if
self
.
tracks_on_change_inputs
:
def
chin
(
node
,
i
,
r
,
new_r
,
reason
):
if
node
is
not
current_node
and
not
isinstance
(
node
,
str
):
q
.
append
(
node
)
u
=
self
.
attach_updater
(
fgraph
,
importer
,
pruner
,
chin
=
chin
,
name
=
getattr
(
self
,
'name'
,
None
))
try
:
while
q
:
node
=
q
.
pop
()
...
...
theano/gof/optdb.py
浏览文件 @
3c70348f
...
...
@@ -244,16 +244,26 @@ class EquilibriumDB(DB):
optimization application. This could result in less fgraph iterations,
but this doesn't mean it will be faster globally.
tracks_on_change_inputs
If True, we will re-apply local opt on nodes whose inputs
changed during local optimization application. This could
result in less fgraph iterations, but this doesn't mean it
will be faster globally.
Notes
-----
We can put LocalOptimizer and Optimizer as EquilibriumOptimizer
suppor both.
It is probably not a good idea to have ignore_newtrees=False and
tracks_on_change_inputs=True
"""
def
__init__
(
self
,
ignore_newtrees
=
True
):
def
__init__
(
self
,
ignore_newtrees
=
True
,
tracks_on_change_inputs
=
False
):
super
(
EquilibriumDB
,
self
)
.
__init__
()
self
.
ignore_newtrees
=
ignore_newtrees
self
.
tracks_on_change_inputs
=
tracks_on_change_inputs
self
.
__final__
=
{}
self
.
__cleanup__
=
{}
...
...
@@ -281,6 +291,7 @@ class EquilibriumDB(DB):
opts
,
max_use_ratio
=
config
.
optdb
.
max_use_ratio
,
ignore_newtrees
=
self
.
ignore_newtrees
,
tracks_on_change_inputs
=
self
.
tracks_on_change_inputs
,
failure_callback
=
opt
.
NavigatorOptimizer
.
warn_inplace
,
final_optimizers
=
final_opts
,
cleanup_optimizers
=
cleanup_opts
)
...
...
theano/gpuarray/dnn.py
浏览文件 @
3c70348f
...
...
@@ -1493,7 +1493,7 @@ def local_dnn_convi_output_merge(node, *inputs):
return
[
GpuDnnConvGradI
(
algo
=
node
.
op
.
algo
)(
*
inputs
)]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
Pool
])
def
local_pool_dnn_alternative
(
node
,
ctx_name
):
if
not
dnn_available
(
ctx_name
):
...
...
@@ -1509,7 +1509,7 @@ def local_pool_dnn_alternative(node, ctx_name):
return
dnn_pool
(
gpu_contiguous
(
img
),
ds
,
stride
=
stride
,
pad
=
pad
,
mode
=
mode
)
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
MaxPoolGrad
])
def
local_pool_dnn_grad_stride
(
node
,
ctx_name
):
if
not
dnn_available
(
ctx_name
):
...
...
@@ -1533,7 +1533,7 @@ def local_pool_dnn_grad_stride(node, ctx_name):
pad
)
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
AveragePoolGrad
])
def
local_avg_pool_dnn_grad_stride
(
node
,
ctx_name
):
if
not
dnn_available
(
ctx_name
):
...
...
@@ -1556,7 +1556,7 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
cg
,
cg
,
ds
,
st
,
pad
)
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@local_optimizer
([
GpuSoftmax
])
def
local_softmax_dnn
(
node
):
if
isinstance
(
node
.
op
,
GpuSoftmax
):
...
...
@@ -1569,7 +1569,7 @@ def local_softmax_dnn(node):
return
[
out
]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
,
'stabilize'
)
@local_optimizer
([
GpuElemwise
])
def
local_log_softmax_dnn
(
node
):
# This looks for GpuDnnSoftmax so we know that we have cudnn.
...
...
@@ -1586,7 +1586,7 @@ def local_log_softmax_dnn(node):
return
[
new_softmax
(
softmax_node
.
inputs
[
0
])]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
LogSoftmax
])
def
local_logsoftmax_to_dnn
(
node
,
ctx_name
):
# Transform the input in the format expected by GpuDnnSoftmax
...
...
@@ -1624,7 +1624,7 @@ class NoCuDNNRaise(Optimizer):
gpu_seqopt
.
register
(
"NoCuDNNRaise"
,
NoCuDNNRaise
(),
0
,
'cudnn'
)
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
SoftmaxGrad
])
def
local_softmax_dnn_grad
(
node
,
ctx_name
):
if
not
dnn_available
(
ctx_name
):
...
...
theano/gpuarray/opt.py
浏览文件 @
3c70348f
...
...
@@ -711,18 +711,14 @@ def local_gpua_careduce(node, context_name):
assert
reduce_mask
[
a
]
==
0
reduce_mask
[
a
]
=
1
shape_of
=
node
.
fgraph
.
shape_feature
.
shape_of
x_shape
=
shape_of
[
x
]
new_in_shp
=
[
x_shape
[
0
]]
new_in_shp
=
[
shape_i
(
x
,
0
)]
new_mask
=
[
reduce_mask
[
0
]]
for
i
in
xrange
(
1
,
x
.
type
.
ndim
):
if
reduce_mask
[
i
]
==
reduce_mask
[
i
-
1
]:
new_in_shp
[
-
1
]
*=
x_shape
[
i
]
new_in_shp
[
-
1
]
*=
shape_i
(
x
,
i
)
else
:
new_mask
.
append
(
reduce_mask
[
i
])
new_in_shp
.
append
(
x_shape
[
i
]
)
new_in_shp
.
append
(
shape_i
(
x
,
i
)
)
new_axis
=
[]
for
idx
,
m
in
enumerate
(
new_mask
):
if
m
==
1
:
...
...
@@ -744,8 +740,12 @@ def local_gpua_careduce(node, context_name):
greduce
(
gpu_reshaped_x
))
if
reduce_reshaped_x
.
ndim
!=
node
.
outputs
[
0
]
.
ndim
:
out_shp
=
[]
for
i
in
range
(
x
.
ndim
):
if
i
not
in
node
.
op
.
axis
:
out_shp
.
append
(
shape_i
(
x
,
i
))
unreshaped_reduce
=
reduce_reshaped_x
.
reshape
(
tensor
.
stack
(
shape_of
[
node
.
outputs
[
0
]]
))
tensor
.
stack
(
out_shp
))
else
:
unreshaped_reduce
=
reduce_reshaped_x
return
[
unreshaped_reduce
]
...
...
theano/misc/check_blas.py
浏览文件 @
3c70348f
...
...
@@ -249,6 +249,7 @@ if __name__ == "__main__":
cuda version 7.5 7.0 6.5
gpu
M40 0.47s
k80 0.96s
K6000/NOECC 0.69s
K40 0.88s
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
3c70348f
...
...
@@ -2526,7 +2526,8 @@ if True:
out
=
as_cuda_ndarray_variable
(
out
.
dimshuffle
(
0
,
1
))
return
[
out
]
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
,
'stabilize'
,
'fast_compile'
)
# We put fast_compile as otherwise it won't be on the GPU.
@local_optimizer
([
GpuElemwise
,
LogSoftmax
])
def
local_log_softmax_dnn
(
node
):
# The log-softmax implementation is only available starting at cuDNN V3
...
...
theano/sandbox/cuda/opt.py
浏览文件 @
3c70348f
...
...
@@ -14,6 +14,7 @@ from . import dnn
import
theano
from
theano
import
scalar
as
scal
from
theano
import
config
,
tensor
,
gof
from
theano.compile.ops
import
shape_i
import
theano.ifelse
import
theano.tensor.signal.pool
import
theano.tensor.nnet
...
...
@@ -900,18 +901,14 @@ def local_gpu_careduce(node):
# to make them a single dimension, do the reduction, and
# then reshape to get them back.
shape_of
=
node
.
fgraph
.
shape_feature
.
shape_of
x_shape
=
shape_of
[
x
]
new_in_shp
=
[
x_shape
[
0
]]
new_in_shp
=
[
shape_i
(
x
,
0
)]
new_mask
=
[
reduce_mask
[
0
]]
for
i
in
xrange
(
1
,
x
.
type
.
ndim
):
if
reduce_mask
[
i
]
==
reduce_mask
[
i
-
1
]:
new_in_shp
[
-
1
]
*=
x_shape
[
i
]
new_in_shp
[
-
1
]
*=
shape_i
(
x
,
i
)
else
:
new_mask
.
append
(
reduce_mask
[
i
])
new_in_shp
.
append
(
x_shape
[
i
]
)
new_in_shp
.
append
(
shape_i
(
x
,
i
)
)
new_greduce
=
GpuCAReduce
(
new_mask
,
scalar_op
)
new_x
=
x
.
reshape
(
tensor
.
stack
(
new_in_shp
))
...
...
@@ -936,8 +933,11 @@ def local_gpu_careduce(node):
# Restore the expected shape of the output
if
rval
.
ndim
!=
out
.
ndim
:
rval
=
rval
.
reshape
(
tensor
.
stack
(
shape_of
[
out
]))
out_shp
=
[]
for
i
in
range
(
x
.
ndim
):
if
i
not
in
node
.
op
.
axis
:
out_shp
.
append
(
shape_i
(
x
,
i
))
rval
=
rval
.
reshape
(
tensor
.
stack
(
out_shp
))
if
rval
.
type
==
out
.
type
:
return
[
rval
]
...
...
theano/tensor/blas.py
浏览文件 @
3c70348f
...
...
@@ -1436,7 +1436,8 @@ class GemmOptimizer(Optimizer):
if
new_node
is
not
node
:
nodelist
.
append
(
new_node
)
u
=
theano
.
gof
.
opt
.
Updater
(
on_import
,
None
,
None
)
u
=
theano
.
gof
.
opt
.
Updater
(
on_import
,
None
,
None
,
name
=
"GemmOptimizer"
)
fgraph
.
attach_feature
(
u
)
while
did_something
:
nb_iter
+=
1
...
...
theano/tensor/opt.py
浏览文件 @
3c70348f
...
...
@@ -1260,6 +1260,12 @@ class ShapeFeature(object):
for
node
in
fgraph
.
toposort
():
self
.
on_import
(
fgraph
,
node
,
reason
=
'on_attach'
)
def
on_detach
(
self
,
fgraph
):
self
.
shape_of
=
{}
self
.
scheduled
=
{}
self
.
shape_of_reverse_index
=
{}
del
fgraph
.
shape_feature
def
on_import
(
self
,
fgraph
,
node
,
reason
):
if
node
.
outputs
[
0
]
in
self
.
shape_of
:
# this is a revert, not really an import
...
...
@@ -1436,10 +1442,23 @@ class ShapeOptimizer(Optimizer):
def
apply
(
self
,
fgraph
):
pass
class
UnShapeOptimizer
(
Optimizer
):
"""Optimizer remove ShapeFeature as an fgraph feature."""
def
apply
(
self
,
fgraph
):
for
feature
in
fgraph
.
_features
:
if
isinstance
(
feature
,
ShapeFeature
):
fgraph
.
remove_feature
(
feature
)
# Register it after merge1 optimization at 0. We don't want to track
# the shape of merged node.
theano
.
compile
.
mode
.
optdb
.
register
(
'ShapeOpt'
,
ShapeOptimizer
(),
0.1
,
'fast_run'
,
'fast_compile'
)
# Not enabled by default for now. Some crossentropy opt use the
# shape_feature. They are at step 2.01. uncanonicalize is at step
# 3. After it goes to 48.5 that move to the gpu. So 10 seem resonable.
theano
.
compile
.
mode
.
optdb
.
register
(
'UnShapeOpt'
,
UnShapeOptimizer
(),
10
)
def
local_elemwise_alloc_op
(
ElemwiseOP
,
AllocOP
,
DimShuffleOP
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论