Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
d704a600
提交
d704a600
authored
3月 20, 2017
作者:
abergeron
提交者:
GitHub
3月 20, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5562 from nouiz/opt_prof
Speed merge optimizer and allow partial optimizer profile
上级
4e482807
80d3b56a
隐藏空白字符变更
内嵌
并排
正在显示
7 个修改的文件
包含
185 行增加
和
139 行删除
+185
-139
function_module.py
theano/compile/function_module.py
+26
-13
opt.py
theano/gof/opt.py
+50
-36
test_graph.py
theano/gof/tests/test_graph.py
+1
-3
scan_utils.py
theano/scan_module/scan_utils.py
+68
-41
test_scan_opt.py
theano/scan_module/tests/test_scan_opt.py
+1
-4
basic.py
theano/tensor/basic.py
+31
-37
opt.py
theano/tensor/opt.py
+8
-5
没有找到文件。
theano/compile/function_module.py
浏览文件 @
d704a600
...
...
@@ -1466,6 +1466,10 @@ class FunctionMaker(object):
theano
.
config
.
traceback
.
limit
=
theano
.
config
.
traceback
.
compile_limit
start_optimizer
=
time
.
time
()
# In case there is an error during optimization.
optimizer_profile
=
None
opt_time
=
None
# now optimize the graph
if
theano
.
config
.
cache_optimizations
:
optimizer_profile
=
self
.
optimize_graph_with_cache
(
...
...
@@ -1475,8 +1479,23 @@ class FunctionMaker(object):
end_optimizer
=
time
.
time
()
opt_time
=
end_optimizer
-
start_optimizer
_logger
.
debug
(
'Optimizing took
%
f seconds'
,
opt_time
)
# Add deep copy to respect the memory interface
insert_deepcopy
(
fgraph
,
inputs
,
outputs
+
additional_outputs
)
finally
:
theano
.
config
.
compute_test_value
=
compute_test_value_orig
theano
.
config
.
traceback
.
limit
=
limit_orig
# If the optimizer got interrupted
if
opt_time
is
None
:
end_optimizer
=
time
.
time
()
opt_time
=
end_optimizer
-
start_optimizer
theano
.
compile
.
profiling
.
total_graph_opt_time
+=
opt_time
if
profile
:
if
(
optimizer_profile
is
None
and
hasattr
(
optimizer
,
'pre_profile'
)):
optimizer_profile
=
optimizer
.
pre_profile
profile
.
optimizer_time
+=
opt_time
if
theano
.
config
.
profile_optimizer
:
profile
.
optimizer_profile
=
(
optimizer
,
...
...
@@ -1486,13 +1505,6 @@ class FunctionMaker(object):
warnings
.
warn
((
"config.profile_optimizer requires config.profile to "
" be set to True as well"
),
stacklevel
=
3
)
_logger
.
debug
(
'Optimizing took
%
f seconds'
,
opt_time
)
# Add deep copy to respect the memory interface
insert_deepcopy
(
fgraph
,
inputs
,
outputs
+
additional_outputs
)
finally
:
theano
.
config
.
compute_test_value
=
compute_test_value_orig
theano
.
config
.
traceback
.
limit
=
limit_orig
# initialize the linker
if
not
hasattr
(
linker
,
'accept'
):
...
...
@@ -1784,7 +1796,7 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
if
isinstance
(
mode
,
(
list
,
tuple
)):
# "mode comparison" semantics
raise
Exception
(
"We do not support the passing of multiple modes"
)
else
:
try
:
Maker
=
getattr
(
mode
,
'function_maker'
,
FunctionMaker
)
fn
=
Maker
(
inputs
,
outputs
,
...
...
@@ -1794,11 +1806,12 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
on_unused_input
=
on_unused_input
,
output_keys
=
output_keys
)
.
create
(
defaults
)
t2
=
time
.
time
()
if
profile
:
profile
.
compile_time
+=
t2
-
t1
profile
.
nb_nodes
=
len
(
fn
.
maker
.
fgraph
.
apply_nodes
)
finally
:
t2
=
time
.
time
()
if
profile
:
profile
.
compile_time
+=
t2
-
t1
# TODO: append
profile
.
nb_nodes
=
len
(
fn
.
maker
.
fgraph
.
apply_nodes
)
fn
.
name
=
name
fn
.
maker
.
fgraph
.
name
=
name
...
...
theano/gof/opt.py
浏览文件 @
d704a600
...
...
@@ -228,45 +228,53 @@ class SeqOptimizer(Optimizer, list):
nb_node_before
=
len
(
fgraph
.
apply_nodes
)
sub_profs
=
[]
nb_nodes
=
[]
for
optimizer
in
self
:
try
:
nb_nodes_before
=
len
(
fgraph
.
apply_nodes
)
t0
=
time
.
time
()
sub_prof
=
optimizer
.
optimize
(
fgraph
)
l
.
append
(
float
(
time
.
time
()
-
t0
))
sub_profs
.
append
(
sub_prof
)
nb_nodes
.
append
((
nb_nodes_before
,
len
(
fgraph
.
apply_nodes
)))
if
fgraph
.
profile
:
sub_validate_time
.
append
(
fgraph
.
profile
.
validate_time
)
except
AssertionError
:
# do not catch Assertion failures
raise
except
Exception
as
e
:
if
self
.
failure_callback
:
self
.
failure_callback
(
e
,
self
,
optimizer
)
continue
else
:
raise
if
fgraph
.
profile
:
validate_time
=
fgraph
.
profile
.
validate_time
-
validate_before
callbacks_time
=
{}
for
k
,
v
in
iteritems
(
fgraph
.
execute_callbacks_times
):
if
k
in
callbacks_before
:
t
=
v
-
callbacks_before
[
k
]
if
t
>
0
:
callbacks_time
[
k
]
=
t
else
:
callbacks_time
[
k
]
=
v
else
:
validate_time
=
None
callbacks_time
=
{}
self
.
pre_profile
=
(
self
,
l
,
-
1
,
-
1
,
nb_node_before
,
-
1
,
sub_profs
,
sub_validate_time
,
nb_nodes
,
{})
try
:
for
optimizer
in
self
:
try
:
nb_nodes_before
=
len
(
fgraph
.
apply_nodes
)
t0
=
time
.
time
()
sub_prof
=
optimizer
.
optimize
(
fgraph
)
l
.
append
(
float
(
time
.
time
()
-
t0
))
sub_profs
.
append
(
sub_prof
)
nb_nodes
.
append
((
nb_nodes_before
,
len
(
fgraph
.
apply_nodes
)))
if
fgraph
.
profile
:
sub_validate_time
.
append
(
fgraph
.
profile
.
validate_time
)
except
AssertionError
:
# do not catch Assertion failures
raise
except
Exception
as
e
:
if
self
.
failure_callback
:
self
.
failure_callback
(
e
,
self
,
optimizer
)
continue
else
:
raise
finally
:
callback_time
=
fgraph
.
execute_callbacks_time
-
callback_before
return
(
self
,
l
,
validate_time
,
callback_time
,
nb_node_before
,
if
fgraph
.
profile
:
validate_time
=
fgraph
.
profile
.
validate_time
-
validate_before
callbacks_time
=
{}
for
k
,
v
in
iteritems
(
fgraph
.
execute_callbacks_times
):
if
k
in
callbacks_before
:
t
=
v
-
callbacks_before
[
k
]
if
t
>
0
:
callbacks_time
[
k
]
=
t
else
:
callbacks_time
[
k
]
=
v
else
:
validate_time
=
None
callbacks_time
=
{}
callback_time
=
fgraph
.
execute_callbacks_time
-
callback_before
self
.
pre_profile
=
(
self
,
l
,
validate_time
,
callback_time
,
nb_node_before
,
len
(
fgraph
.
apply_nodes
),
sub_profs
,
sub_validate_time
,
nb_nodes
,
callbacks_time
)
return
self
.
pre_profile
def
__str__
(
self
):
return
"SeqOpt(
%
s)"
%
list
.
__str__
(
self
)
...
...
@@ -877,7 +885,13 @@ class MergeOptimizer(Optimizer):
pairs
=
[(
pairs
[
0
][
1
],
pairs
[
0
][
0
])]
try
:
fgraph
.
replace_all_validate
(
pairs
,
'MergeOptimizer'
)
# If all Constants, no need to call validate.
# Only need to check one of the var of each pairs.
# If it is a Constant, the other must also be a Constant as we merge them.
if
all
([
isinstance
(
old
,
graph
.
Constant
)
for
old
,
new
in
pairs
]):
fgraph
.
replace_all
(
pairs
,
'MergeOptimizer'
)
else
:
fgraph
.
replace_all_validate
(
pairs
,
'MergeOptimizer'
)
except
InconsistencyError
:
success
=
False
nb_fail
+=
1
...
...
theano/gof/tests/test_graph.py
浏览文件 @
d704a600
...
...
@@ -359,9 +359,7 @@ class TestAutoName:
assert
r1
is
r2
r3
=
tensor
.
constant
(
1.6
)
# The cache still create a new object that we don't return.
# This is why we must increase by 2 and not 1.
assert
r3
.
auto_name
==
"auto_"
+
str
(
autoname_id
+
2
)
assert
r3
.
auto_name
==
"auto_"
+
str
(
autoname_id
+
1
)
def
test_tensorvariable
(
self
):
# Get counter value
...
...
theano/scan_module/scan_utils.py
浏览文件 @
d704a600
...
...
@@ -855,50 +855,77 @@ class Validator(object):
If out is not valid and has no equivalent, None is returned.
"""
if
out
in
self
.
valid
:
return
out
,
True
elif
out
in
self
.
valid_equivalent
:
return
self
.
valid_equivalent
[
out
],
False
elif
out
in
self
.
invalid
:
return
None
if
out
.
owner
is
None
:
if
isinstance
(
out
,
tensor
.
TensorConstant
):
# This might be a constant from the outer graph or a constant
# from the inner graph. In all cases, we can clone it to be
# certain we have a valid constant
cloned_out
=
out
.
clone
()
self
.
valid
.
add
(
cloned_out
)
def
get_value
(
out
):
if
out
in
self
.
valid
:
return
out
,
True
elif
out
in
self
.
valid_equivalent
:
return
self
.
valid_equivalent
[
out
],
False
elif
out
in
self
.
invalid
:
return
None
else
:
raise
RuntimeError
(
"This should not happen"
)
q
=
[
out
]
while
q
:
out
=
q
.
pop
()
if
out
in
self
.
valid
:
continue
elif
out
in
self
.
invalid
:
continue
if
out
.
owner
is
None
:
if
isinstance
(
out
,
tensor
.
TensorConstant
):
if
hasattr
(
out
,
'fgraph'
):
# If out have an fgraph, we aren't sure if it
# is from the inner graph or outer graph, so
# clone it.
cloned_out
=
out
.
clone
()
self
.
valid
.
add
(
cloned_out
)
self
.
invalid
.
add
(
out
)
self
.
valid_equivalent
[
out
]
=
cloned_out
else
:
self
.
valid
.
add
(
out
)
continue
else
:
# This is an input node and it has not been
# explicitly marked as invalid so we can use it
self
.
valid
.
add
(
out
)
continue
# Process the input if needed
continue_while
=
False
for
inp
in
out
.
owner
.
inputs
:
if
inp
not
in
self
.
valid
and
inp
not
in
self
.
invalid
:
q
.
append
(
out
)
q
.
extend
(
out
.
owner
.
inputs
)
continue_while
=
True
break
if
continue_while
:
continue
inputs
=
[
get_value
(
i
)
for
i
in
out
.
owner
.
inputs
]
# If some inputs are invalid without equivalent, so is out
if
None
in
inputs
:
self
.
invalid
.
add
(
out
)
continue
# If some inputs are invalid with equivalent,
# an equivalent out should be built and returned
all_inputs
=
[
inp
for
(
inp
,
is_valid
)
in
inputs
]
equiv_inputs
=
[
inp
for
(
inp
,
is_valid
)
in
inputs
if
not
is_valid
]
if
equiv_inputs
:
cloned_node
=
out
.
owner
.
clone_with_new_inputs
(
all_inputs
)
cloned_out
=
cloned_node
.
outputs
[
out
.
index
]
self
.
invalid
.
add
(
out
)
self
.
valid
.
add
(
cloned_out
)
self
.
valid_equivalent
[
out
]
=
cloned_out
return
cloned_out
,
False
else
:
# This is an input node and it has not been explicitly marked
# as invalid so we can use it
return
out
,
True
continue
# All inputs are valid, so is out
self
.
valid
.
add
(
out
)
# Recurse over inputs
inputs
=
[
self
.
check
(
i
)
for
i
in
out
.
owner
.
inputs
]
# If some inputs are invalid without equivalent, so is out
if
None
in
inputs
:
self
.
invalid
.
add
(
out
)
return
None
# If some inputs are invalid with equivalent,
# an equivalent out should be built and returned
all_inputs
=
[
inp
for
(
inp
,
is_valid
)
in
inputs
]
equiv_inputs
=
[
inp
for
(
inp
,
is_valid
)
in
inputs
if
not
is_valid
]
if
equiv_inputs
:
cloned_node
=
out
.
owner
.
clone_with_new_inputs
(
all_inputs
)
cloned_out
=
cloned_node
.
outputs
[
out
.
index
]
self
.
invalid
.
add
(
out
)
self
.
valid
.
add
(
cloned_out
)
self
.
valid_equivalent
[
out
]
=
cloned_out
return
cloned_out
,
False
# All inputs are valid, so is out
return
out
,
True
return
get_value
(
out
)
def
scan_can_remove_outs
(
op
,
out_idxs
):
...
...
theano/scan_module/tests/test_scan_opt.py
浏览文件 @
d704a600
...
...
@@ -113,10 +113,7 @@ class TestGaussNewton(unittest.TestCase):
def
test_nobatch
(
self
):
# This used to give an error due to optimization "scan_merge_inouts".
# The batch size is set to 1 and the data is represented by a matrix.
# As of 2013-10-24, it still triggers an optimization error due to
# "remove_constants_and_unused_inputs_scan".
mode_exc
=
mode
.
excluding
(
"remove_constants_and_unused_inputs_scan"
)
self
.
_run
(
100
,
10
,
batch_size
=
1
,
mode
=
mode_exc
)
self
.
_run
(
100
,
10
,
batch_size
=
1
,
mode
=
mode
)
class
GaussNewtonMatrix
(
object
):
...
...
theano/tensor/basic.py
浏览文件 @
d704a600
...
...
@@ -19,7 +19,7 @@ from theano.gof.type import Generic
from
theano.tensor
import
elemwise
from
theano.tensor.var
import
(
AsTensorError
,
TensorVariable
,
TensorConstant
,
TensorConstant
,
TensorConstantSignature
,
_tensor_py_operators
)
from
theano.tensor.type
import
TensorType
,
values_eq_approx_always_true
from
theano.tensor.type_other
import
NoneConst
...
...
@@ -220,7 +220,7 @@ _as_tensor_variable = as_tensor_variable
as_tensor
=
as_tensor_variable
def
constant
_or_value
(
x
,
rtype
,
name
=
None
,
ndim
=
None
,
dtype
=
None
):
def
constant
(
x
,
name
=
None
,
ndim
=
None
,
dtype
=
None
):
"""Return a symbolic `Constant` with value `x`.
Raises
...
...
@@ -230,6 +230,16 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
ValueError
`x` could not be expanded to have ndim dimensions.
Note
----
We create a small cache of frequently used constant.
This speed up the Merge optimization for big graph.
We want to cache all scalar to don't merge as frequently constants.
But we don't want to cache too much stuff.
So we cache integer with dtype [u]int and float where the value is
between -10 and 10.
We cache all broadcast pattern for scalar.
"""
x_
=
scal
.
convert
(
x
,
dtype
=
dtype
)
...
...
@@ -245,45 +255,29 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
assert
len
(
bcastable
)
==
ndim
try
:
if
rtype
is
TensorConstant
:
rval
=
rtype
(
TensorType
(
dtype
=
x_
.
dtype
,
broadcastable
=
bcastable
),
x_
.
copy
(),
name
=
name
)
return
rval
else
:
# leave the shape out of the type
return
rtype
(
TensorType
(
dtype
=
x_
.
dtype
,
broadcastable
=
bcastable
),
x_
,
name
=
name
)
ttype
=
TensorType
(
dtype
=
x_
.
dtype
,
broadcastable
=
bcastable
)
if
not
constant
.
enable
:
return
TensorConstant
(
ttype
,
x_
,
name
=
name
)
sig
=
TensorConstantSignature
((
ttype
,
x_
))
if
sig
in
constant_cache
:
return
constant_cache
[
sig
]
ret
=
TensorConstant
(
ttype
,
x_
,
name
=
name
)
if
(
x_
.
size
==
1
and
(
-
10
)
<=
x_
<=
10
and
(
x_
.
dtype
in
int_dtypes
or
x_
.
dtype
in
uint_dtypes
or
(
x_
.
dtype
in
float_dtypes
and
# Limit the size of the cache.
len
(
constant_cache
)
<
10000
))):
constant_cache
[
sig
]
=
ret
# This is needed to raise a good error to the user.
ret
.
cached
=
True
return
ret
except
Exception
:
raise
TypeError
(
"Could not convert
%
s to TensorType"
%
x
,
type
(
x
))
def
constant
(
x
,
name
=
None
,
ndim
=
None
,
dtype
=
None
):
ret
=
constant_or_value
(
x
,
rtype
=
TensorConstant
,
name
=
name
,
ndim
=
ndim
,
dtype
=
dtype
)
# We create a small cache of frequently used constant.
# This speed up the Merge optimization for big graph.
# We want to cache all scalar to don't merge as frequently constants.
# But we don't want to cache too much stuff
# So we cache integer with dtype [u]int and float where the value is
# between -10 and 10
# We want to cache all broadcast pattern for scalar.
if
not
constant
.
enable
:
return
ret
sig
=
ret
.
signature
()
if
(
sig
not
in
constant_cache
and
ret
.
data
.
size
==
1
and
(
-
10
)
<=
ret
.
data
<=
10
and
(
ret
.
dtype
in
int_dtypes
or
ret
.
dtype
in
uint_dtypes
or
(
ret
.
dtype
in
float_dtypes
and
# Limit the size of the cache.
len
(
constant_cache
)
<
10000
))):
constant_cache
[
sig
]
=
ret
# This is needed to raise a good error to the user.
ret
.
cached
=
True
return
constant_cache
.
get
(
sig
,
ret
)
constant
.
enable
=
True
constant_cache
=
{}
...
...
theano/tensor/opt.py
浏览文件 @
d704a600
...
...
@@ -3251,12 +3251,15 @@ def local_IncSubtensor_serialize(node):
if
movable_inputs
:
new_inputs
=
([
i
for
i
in
node
.
inputs
if
not
movable
(
i
)]
+
[
mi
.
owner
.
inputs
[
0
]
for
mi
in
movable_inputs
])
new_add
=
T
.
add
(
*
new_inputs
)
if
len
(
new_inputs
)
==
0
:
new_add
=
new_inputs
[
0
]
else
:
new_add
=
T
.
add
(
*
new_inputs
)
# Copy over stacktrace from original output, as an error
# (e.g. an index error) in this add operation should
# correspond to an error in the original add operation.
copy_stack_trace
(
node
.
outputs
[
0
],
new_add
)
# Copy over stacktrace from original output, as an error
# (e.g. an index error) in this add operation should
# correspond to an error in the original add operation.
copy_stack_trace
(
node
.
outputs
[
0
],
new_add
)
# stack up the new incsubtensors
tip
=
new_add
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论