Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
bb096349
提交
bb096349
authored
6月 15, 2012
作者:
lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #666 from nouiz/time_opt
Time opt
上级
b8165faa
0c0c9f91
隐藏空白字符变更
内嵌
并排
正在显示
11 个修改的文件
包含
315 行增加
和
21 行删除
+315
-21
function_module.py
theano/compile/function_module.py
+13
-4
mode.py
theano/compile/mode.py
+4
-0
profiling.py
theano/compile/profiling.py
+33
-3
env.py
theano/gof/env.py
+1
-0
graph.py
theano/gof/graph.py
+1
-1
opt.py
theano/gof/opt.py
+235
-5
optdb.py
theano/gof/optdb.py
+4
-1
toolbox.py
theano/gof/toolbox.py
+15
-4
vm.py
theano/gof/vm.py
+3
-0
test_driver.py
theano/sandbox/cuda/tests/test_driver.py
+4
-2
scan.py
theano/scan_module/scan.py
+2
-1
没有找到文件。
theano/compile/function_module.py
浏览文件 @
bb096349
...
@@ -994,11 +994,16 @@ class FunctionMaker(object):
...
@@ -994,11 +994,16 @@ class FunctionMaker(object):
# we allow ProfileMode to provide a ProfileStats object
# we allow ProfileMode to provide a ProfileStats object
# using this somewhat awkward mechanism.
# using this somewhat awkward mechanism.
mode_profile
=
getattr
(
mode
,
'profile'
,
None
)
mode_profile
=
getattr
(
mode
,
'profile'
,
None
)
if
(
profile
is
not
None
)
and
(
mode_profile
is
not
None
):
if
(
profile
is
not
None
and
profile
is
not
False
and
mode_profile
is
not
None
):
raise
TypeError
(
raise
TypeError
(
'profile passed via both "mode" and "profile" arguments'
)
'profile passed via both "mode" and "profile" arguments'
)
self
.
profile
=
profile
=
profile
or
mode_profile
self
.
profile
=
profile
=
profile
or
mode_profile
if
profile
:
# We preload the cache here to don't have its timming
# included in optimization that compile function.
theano
.
gof
.
cc
.
get_module_cache
()
# Handle the case where inputs and/or outputs is a single Variable (not in a list)
# Handle the case where inputs and/or outputs is a single Variable (not in a list)
self
.
orig_outputs
=
outputs
self
.
orig_outputs
=
outputs
unpack_single
=
False
unpack_single
=
False
...
@@ -1030,6 +1035,8 @@ class FunctionMaker(object):
...
@@ -1030,6 +1035,8 @@ class FunctionMaker(object):
# make the env (copies the graph, creates NEW INPUT AND OUTPUT VARIABLES)
# make the env (copies the graph, creates NEW INPUT AND OUTPUT VARIABLES)
env
,
additional_outputs
=
std_env
(
expanded_inputs
,
outputs
,
accept_inplace
)
env
,
additional_outputs
=
std_env
(
expanded_inputs
,
outputs
,
accept_inplace
)
env
.
profile
=
profile
self
.
env
=
env
self
.
env
=
env
# Fetch the optimizer and linker
# Fetch the optimizer and linker
...
@@ -1042,13 +1049,15 @@ class FunctionMaker(object):
...
@@ -1042,13 +1049,15 @@ class FunctionMaker(object):
theano
.
config
.
compute_test_value
=
"off"
theano
.
config
.
compute_test_value
=
"off"
gof
.
Op
.
add_stack_trace_on_call
=
False
gof
.
Op
.
add_stack_trace_on_call
=
False
start_optimizer
=
time
.
time
()
start_optimizer
=
time
.
time
()
optimizer
(
env
)
optimizer
_profile
=
optimizer
(
env
)
end_optimizer
=
time
.
time
()
end_optimizer
=
time
.
time
()
opt_time
=
end_optimizer
-
start_optimizer
opt_time
=
end_optimizer
-
start_optimizer
mode
.
optimizer_time
+=
opt_time
mode
.
optimizer_time
+=
opt_time
if
profile
:
if
profile
:
profile
.
optimizer_time
+=
opt_time
profile
.
optimizer_time
+=
opt_time
if
theano
.
config
.
profile_optimizer
:
profile
.
optimizer_profile
=
(
optimizer
,
optimizer_profile
)
_logger
.
debug
(
'Optimizing took
%
f seconds'
,
opt_time
)
_logger
.
debug
(
'Optimizing took
%
f seconds'
,
opt_time
)
#Add deep copy to respect the memory interface
#Add deep copy to respect the memory interface
...
...
theano/compile/mode.py
浏览文件 @
bb096349
...
@@ -88,6 +88,10 @@ OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
...
@@ -88,6 +88,10 @@ OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
OPT_FAST_COMPILE
=
gof
.
Query
(
include
=
[
'fast_compile'
])
OPT_FAST_COMPILE
=
gof
.
Query
(
include
=
[
'fast_compile'
])
OPT_STABILIZE
=
gof
.
Query
(
include
=
[
'fast_run'
])
OPT_STABILIZE
=
gof
.
Query
(
include
=
[
'fast_run'
])
OPT_STABILIZE
.
position_cutoff
=
1.5000001
OPT_STABILIZE
.
position_cutoff
=
1.5000001
OPT_FAST_RUN
.
name
=
'OPT_FAST_RUN'
OPT_FAST_RUN_STABLE
.
name
=
'OPT_FAST_RUN_STABLE'
OPT_FAST_COMPILE
.
name
=
'OPT_FAST_COMPILE'
OPT_STABILIZE
.
name
=
'OPT_STABILIZE'
predefined_optimizers
=
{
predefined_optimizers
=
{
None
:
(
lambda
env
:
None
),
None
:
(
lambda
env
:
None
),
...
...
theano/compile/profiling.py
浏览文件 @
bb096349
...
@@ -38,12 +38,14 @@ AddConfigVar('profiling.time_thunks',
...
@@ -38,12 +38,14 @@ AddConfigVar('profiling.time_thunks',
def
_atexit_print_fn
():
def
_atexit_print_fn
():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file
"""
"""
printed
=
0
for
ps
in
_atexit_print_list
:
for
ps
in
_atexit_print_list
:
if
ps
.
fct_callcount
or
ps
.
compile_time
>
0
:
if
ps
.
fct_callcount
or
ps
.
compile_time
>
0
:
ps
.
summary
(
file
=
_atexit_print_file
)
ps
.
summary
(
file
=
_atexit_print_file
)
printed
+=
1
else
:
else
:
print
'Skipping empty Profile'
print
'Skipping empty Profile'
if
len
(
_atexit_print_list
)
>
1
:
if
printed
>
1
:
# Make a global profile
# Make a global profile
cum
=
copy
.
copy
(
_atexit_print_list
[
0
])
cum
=
copy
.
copy
(
_atexit_print_list
[
0
])
cum
.
message
=
"Sum of all printed profiles at exit"
cum
.
message
=
"Sum of all printed profiles at exit"
...
@@ -51,14 +53,26 @@ def _atexit_print_fn():
...
@@ -51,14 +53,26 @@ def _atexit_print_fn():
# for ps in [ps for ps in _atexit_print_list[1:]
# for ps in [ps for ps in _atexit_print_list[1:]
# if not isinstance(ps, ScanProfileStats)]:
# if not isinstance(ps, ScanProfileStats)]:
for
attr
in
[
"compile_time"
,
"fct_call_time"
,
"fct_callcount"
,
for
attr
in
[
"compile_time"
,
"fct_call_time"
,
"fct_callcount"
,
"vm_call_time"
,
"optimizer_time"
,
"linker_time"
]:
"vm_call_time"
,
"optimizer_time"
,
"linker_time"
,
"validate_time"
]:
setattr
(
cum
,
attr
,
getattr
(
cum
,
attr
)
+
getattr
(
ps
,
attr
))
setattr
(
cum
,
attr
,
getattr
(
cum
,
attr
)
+
getattr
(
ps
,
attr
))
#merge dictonary
for
attr
in
[
"apply_time"
,
"apply_callcount"
,
for
attr
in
[
"apply_time"
,
"apply_callcount"
,
"apply_cimpl"
,
"outputs_size"
]:
"apply_cimpl"
,
"outputs_size"
]:
cum_attr
=
getattr
(
cum
,
attr
)
cum_attr
=
getattr
(
cum
,
attr
)
for
key
,
val
in
getattr
(
ps
,
attr
)
.
iteritems
():
for
key
,
val
in
getattr
(
ps
,
attr
)
.
iteritems
():
assert
key
not
in
cum_attr
assert
key
not
in
cum_attr
cum_attr
[
key
]
=
val
cum_attr
[
key
]
=
val
if
cum
.
optimizer_profile
and
ps
.
optimizer_profile
:
merge
=
cum
.
optimizer_profile
[
0
]
.
merge_profile
(
cum
.
optimizer_profile
[
1
],
ps
.
optimizer_profile
[
1
])
cum
.
optimizer_profile
=
(
cum
.
optimizer_profile
[
0
],
merge
)
else
:
cum
.
optimizer_profile
=
None
cum
.
summary
(
file
=
_atexit_print_file
)
cum
.
summary
(
file
=
_atexit_print_file
)
...
@@ -118,11 +132,19 @@ class ProfileStats(object):
...
@@ -118,11 +132,19 @@ class ProfileStats(object):
optimizer_time
=
0.0
optimizer_time
=
0.0
# time spent optimizing graph (FunctionMaker.__init__)
# time spent optimizing graph (FunctionMaker.__init__)
validate_time
=
0.0
# time spent in env.validate
# This is a subset of optimizer_time that is dominated by toposort()
# when the destorymap feature is included.
linker_time
=
0.0
linker_time
=
0.0
# time spent linking graph (FunctionMaker.create)
# time spent linking graph (FunctionMaker.create)
line_width
=
140
line_width
=
140
optimizer_profile
=
None
# None or tuple (the optimizer, the profile it returned)
# param is called flag_time_thunks because most other attributes with time
# param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
# in the name are times *of* something, rather than configuration flags.
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
**
kwargs
):
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
**
kwargs
):
...
@@ -390,11 +412,15 @@ class ProfileStats(object):
...
@@ -390,11 +412,15 @@ class ProfileStats(object):
local_time
,
100
*
local_time
/
self
.
fct_call_time
)
local_time
,
100
*
local_time
/
self
.
fct_call_time
)
print
>>
file
,
' Total compile time:
%
es'
%
self
.
compile_time
print
>>
file
,
' Total compile time:
%
es'
%
self
.
compile_time
print
>>
file
,
' Theano Optimizer time:
%
es'
%
self
.
optimizer_time
print
>>
file
,
' Theano Optimizer time:
%
es'
%
self
.
optimizer_time
print
>>
file
,
' Theano validate time:
%
es'
%
self
.
validate_time
print
>>
file
,
(
' Theano Linker time (includes C,'
print
>>
file
,
(
' Theano Linker time (includes C,'
' CUDA code generation/compiling):
%
es'
%
' CUDA code generation/compiling):
%
es'
%
self
.
linker_time
)
self
.
linker_time
)
print
>>
file
,
''
print
>>
file
,
''
# The validation time is a subset of optimizer_time
assert
self
.
validate_time
<
self
.
optimizer_time
def
summary
(
self
,
file
=
sys
.
stderr
,
n_ops_to_print
=
20
,
def
summary
(
self
,
file
=
sys
.
stderr
,
n_ops_to_print
=
20
,
n_applies_to_print
=
20
):
n_applies_to_print
=
20
):
self
.
summary_function
(
file
)
self
.
summary_function
(
file
)
...
@@ -402,9 +428,13 @@ class ProfileStats(object):
...
@@ -402,9 +428,13 @@ class ProfileStats(object):
if
local_time
>
0
:
if
local_time
>
0
:
self
.
summary_ops
(
file
,
n_ops_to_print
)
self
.
summary_ops
(
file
,
n_ops_to_print
)
self
.
summary_nodes
(
file
,
n_applies_to_print
)
self
.
summary_nodes
(
file
,
n_applies_to_print
)
el
se
:
el
if
self
.
fct_callcount
>
0
:
print
>>
file
,
(
" No node time accumulated "
print
>>
file
,
(
" No node time accumulated "
"(hint: try config profiling.time_thunks=1)"
)
"(hint: try config profiling.time_thunks=1)"
)
if
self
.
optimizer_profile
:
print
"Optimizer Profile"
print
"-----------------"
self
.
optimizer_profile
[
0
]
.
print_profile
(
file
,
self
.
optimizer_profile
[
1
])
if
0
:
# old code still to be ported from ProfileMode
if
0
:
# old code still to be ported from ProfileMode
...
...
theano/gof/env.py
浏览文件 @
bb096349
...
@@ -129,6 +129,7 @@ class Env(utils.object2):
...
@@ -129,6 +129,7 @@ class Env(utils.object2):
self
.
node_locks
=
{}
self
.
node_locks
=
{}
self
.
variable_locks
=
{}
self
.
variable_locks
=
{}
self
.
profile
=
None
### Setup a Variable ###
### Setup a Variable ###
...
...
theano/gof/graph.py
浏览文件 @
bb096349
...
@@ -567,7 +567,7 @@ def clone(i, o, copy_inputs = True):
...
@@ -567,7 +567,7 @@ def clone(i, o, copy_inputs = True):
:type o: list
:type o: list
:param o: output L{Variable}s
:param o: output L{Variable}s
:type copy_inputs: bool
:type copy_inputs: bool
:param copy_inputs: if True, the inputs will be copied (defaults to
Fals
e)
:param copy_inputs: if True, the inputs will be copied (defaults to
Tru
e)
Returns the inputs and outputs of that copy.
Returns the inputs and outputs of that copy.
"""
"""
...
...
theano/gof/opt.py
浏览文件 @
bb096349
...
@@ -75,7 +75,7 @@ class Optimizer(object):
...
@@ -75,7 +75,7 @@ class Optimizer(object):
opt.apply(env)
opt.apply(env)
"""
"""
self
.
add_requirements
(
env
)
self
.
add_requirements
(
env
)
self
.
apply
(
env
,
*
args
,
**
kwargs
)
return
self
.
apply
(
env
,
*
args
,
**
kwargs
)
def
__call__
(
self
,
env
):
def
__call__
(
self
,
env
):
"""WRITEME
"""WRITEME
...
@@ -98,6 +98,12 @@ class Optimizer(object):
...
@@ -98,6 +98,12 @@ class Optimizer(object):
print
>>
stream
,
"
%
s
%
s
%
s id=
%
i"
%
(
print
>>
stream
,
"
%
s
%
s
%
s id=
%
i"
%
(
(
' '
*
level
),
self
.
__class__
.
__name__
,
name
,
id
(
self
))
(
' '
*
level
),
self
.
__class__
.
__name__
,
name
,
id
(
self
))
def
print_profile
(
self
,
prof
):
if
prof
is
not
None
:
raise
NotImplementedError
(
"The function print_profile must be overrided if the"
" optimizer return profiling information."
)
class
FromFunctionOptimizer
(
Optimizer
):
class
FromFunctionOptimizer
(
Optimizer
):
"""WRITEME"""
"""WRITEME"""
...
@@ -154,12 +160,16 @@ class SeqOptimizer(Optimizer, list):
...
@@ -154,12 +160,16 @@ class SeqOptimizer(Optimizer, list):
Applies each L{Optimizer} in self in turn.
Applies each L{Optimizer} in self in turn.
"""
"""
l
=
[]
l
=
[]
if
env
.
profile
:
validate_before
=
env
.
profile
.
validate_time
nb_node_before
=
len
(
env
.
nodes
)
nb_node_before
=
len
(
env
.
nodes
)
sub_profs
=
[]
for
optimizer
in
self
:
for
optimizer
in
self
:
try
:
try
:
t0
=
time
.
time
()
t0
=
time
.
time
()
optimizer
.
optimize
(
env
)
sub_prof
=
optimizer
.
optimize
(
env
)
l
.
append
(
float
(
time
.
time
()
-
t0
))
l
.
append
(
float
(
time
.
time
()
-
t0
))
sub_profs
.
append
(
sub_prof
)
except
AssertionError
:
except
AssertionError
:
# do not catch Assertion failures
# do not catch Assertion failures
raise
raise
...
@@ -169,12 +179,14 @@ class SeqOptimizer(Optimizer, list):
...
@@ -169,12 +179,14 @@ class SeqOptimizer(Optimizer, list):
continue
continue
else
:
else
:
raise
raise
if
config
.
time_seq_optimizer
:
if
config
.
time_seq_optimizer
:
print
"SeqOptimizer"
,
print
"SeqOptimizer"
,
if
hasattr
(
self
,
"name"
):
print
self
.
name
,
if
hasattr
(
self
,
"name"
):
print
self
.
name
,
elif
hasattr
(
self
,
"__name__"
):
print
self
.
__name__
,
elif
hasattr
(
self
,
"__name__"
):
print
self
.
__name__
,
print
" time
%.3
fs for
%
d/
%
d nodes before/after optimization"
%
(
sum
(
l
),
nb_node_before
,
len
(
env
.
nodes
))
print
" time
%.3
fs for
%
d/
%
d nodes before/after optimization"
%
(
sum
(
l
),
nb_node_before
,
len
(
env
.
nodes
))
print
" time
%.3
fs for validate "
%
(
env
.
profile
.
validate_time
-
validate_before
)
ll
=
[]
ll
=
[]
for
opt
in
self
:
for
opt
in
self
:
if
hasattr
(
opt
,
"__name__"
):
if
hasattr
(
opt
,
"__name__"
):
...
@@ -191,6 +203,12 @@ class SeqOptimizer(Optimizer, list):
...
@@ -191,6 +203,12 @@ class SeqOptimizer(Optimizer, list):
for
(
t
,
opt
)
in
lll
[::
-
1
]:
for
(
t
,
opt
)
in
lll
[::
-
1
]:
print
'
%.6
fs -
%
s'
%
(
t
,
opt
)
print
'
%.6
fs -
%
s'
%
(
t
,
opt
)
print
print
if
env
.
profile
:
validate_time
=
env
.
profile
.
validate_time
-
validate_before
else
:
validate_time
=
None
return
(
self
,
l
,
validate_time
,
nb_node_before
,
len
(
env
.
nodes
),
sub_profs
)
def
__eq__
(
self
,
other
):
def
__eq__
(
self
,
other
):
#added to override the list's __eq__ implementation
#added to override the list's __eq__ implementation
...
@@ -216,6 +234,115 @@ class SeqOptimizer(Optimizer, list):
...
@@ -216,6 +234,115 @@ class SeqOptimizer(Optimizer, list):
for
opt
in
self
:
for
opt
in
self
:
opt
.
print_summary
(
stream
,
level
=
(
level
+
2
),
depth
=
depth
)
opt
.
print_summary
(
stream
,
level
=
(
level
+
2
),
depth
=
depth
)
@staticmethod
def
print_profile
(
stream
,
prof
,
level
=
0
):
(
opts
,
prof
,
validate_time
,
nb_node_before
,
nb_node_after
,
sub_profs
)
=
prof
blanc
=
(
' '
*
level
)
print
>>
stream
,
blanc
,
"SeqOptimizer"
,
if
hasattr
(
opts
,
"name"
):
print
>>
stream
,
blanc
,
opts
.
name
,
elif
hasattr
(
opts
,
"__name__"
):
print
>>
stream
,
blanc
,
opts
.
__name__
,
print
>>
stream
,
(
" time
%.3
fs for
%
d/
%
d nodes"
" before/after optimization"
%
(
sum
(
prof
),
nb_node_before
,
nb_node_after
))
print
>>
stream
,
blanc
,
"
%.3
fs for env.validate()"
%
(
validate_time
)
if
level
==
0
:
print
>>
stream
,
blanc
,
" time - (name, class, index)"
ll
=
[]
for
opt
in
opts
:
if
hasattr
(
opt
,
"__name__"
):
ll
.
append
((
opt
.
__name__
,
opt
.
__class__
.
__name__
,
opts
.
index
(
opt
)))
else
:
ll
.
append
((
opt
.
name
,
opt
.
__class__
.
__name__
,
opts
.
index
(
opt
)))
lll
=
zip
(
prof
,
ll
)
def
cmp
(
a
,
b
):
if
a
[
0
]
==
b
[
0
]:
return
0
elif
a
[
0
]
<
b
[
0
]:
return
-
1
return
1
lll
.
sort
(
cmp
)
for
(
t
,
opt
)
in
lll
[::
-
1
]:
#if t < 1:
# continue
print
>>
stream
,
blanc
,
'
%.6
fs -
%
s'
%
(
t
,
opt
)
if
sub_profs
[
opt
[
-
1
]]:
opts
[
opt
[
-
1
]]
.
print_profile
(
stream
,
sub_profs
[
opt
[
-
1
]],
level
=
level
+
1
)
print
>>
stream
@staticmethod
def
merge_profile
(
prof1
,
prof2
):
"""
Merge 2 profiles returned by this cass apply() fct.
"""
new_t
=
[]
new_l
=
[]
new_sub_profile
=
[]
#merge common(same object) opt
for
l
in
set
(
prof1
[
0
])
.
intersection
(
set
(
prof2
[
0
])):
idx1
=
prof1
[
0
]
.
index
(
l
)
idx2
=
prof2
[
0
]
.
index
(
l
)
new_t
.
append
(
prof1
[
1
][
idx1
]
+
prof2
[
1
][
idx2
])
new_l
.
append
(
l
)
if
hasattr
(
l
,
'merge_profile'
):
assert
len
(
prof1
[
5
][
idx1
])
==
len
(
prof2
[
5
][
idx1
])
new_sub_profile
.
append
(
l
.
merge_profile
(
prof1
[
5
][
idx1
],
prof2
[
5
][
idx2
]))
else
:
new_sub_profile
.
append
(
None
)
# merge not common opt
import
StringIO
for
l
in
set
(
prof1
[
0
])
.
symmetric_difference
(
set
(
prof2
[
0
])):
#The set trick above only work for the same object optimization
#It don't work for equivalent optimization.
#So we try to merge equivalent optimization here.
new_l_names
=
[
o
.
name
for
o
in
new_l
]
if
l
.
name
in
new_l_names
:
idx
=
new_l_names
.
index
(
l
.
name
)
io1
=
StringIO
.
StringIO
()
io2
=
StringIO
.
StringIO
()
l
.
print_summary
(
io1
)
new_l
[
idx
]
.
print_summary
(
io2
)
if
io1
.
read
()
==
io2
.
read
():
if
l
in
prof1
[
0
]:
p
=
prof1
else
:
p
=
prof2
new_t
[
idx
]
+=
p
[
1
][
p
[
0
]
.
index
(
l
)]
if
hasattr
(
l
,
'merge_profile'
):
assert
len
(
p
[
5
][
p
[
0
]
.
index
(
l
)])
==
len
(
new_sub_profile
[
idx
])
new_sub_profile
[
idx
]
=
l
.
merge_profile
(
new_sub_profile
[
idx
],
p
[
5
][
p
[
0
]
.
index
(
l
)])
else
:
new_sub_profile
[
idx
]
=
None
continue
if
l
in
prof1
[
0
]:
p
=
prof1
else
:
p
=
prof2
new_t
.
append
(
p
[
1
][
p
[
0
]
.
index
(
l
)])
idx
=
p
[
0
]
.
index
(
l
)
new_l
.
append
(
l
)
new_sub_profile
.
append
(
p
[
5
][
idx
])
new_opt
=
SeqOptimizer
(
*
new_l
)
assert
set
(
prof1
[
0
])
.
issubset
(
set
(
new_l
))
# assert set(prof2[0]).issubset(set(new_l))
assert
len
(
new_t
)
==
len
(
new_opt
)
==
len
(
new_sub_profile
)
return
(
new_opt
,
new_t
,
prof1
[
2
]
+
prof2
[
2
],
-
1
,
-
1
,
new_sub_profile
)
class
_metadict
:
class
_metadict
:
"""WRITEME"""
"""WRITEME"""
...
@@ -500,7 +627,9 @@ def MergeOptMerge(opt):
...
@@ -500,7 +627,9 @@ def MergeOptMerge(opt):
opt introduced additional similarities.
opt introduced additional similarities.
"""
"""
merger
=
merge_optimizer
merger
=
merge_optimizer
return
SeqOptimizer
([
merger
,
opt
,
merger
])
opt
=
SeqOptimizer
([
merger
,
opt
,
merger
])
opt
.
name
=
"MergeOptMerge"
return
opt
def
pre_constant_merge
(
vars
):
def
pre_constant_merge
(
vars
):
...
@@ -1314,7 +1443,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -1314,7 +1443,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):
loop_timing
=
[]
loop_timing
=
[]
global_opt_timing
=
[]
global_opt_timing
=
[]
time_lopts
=
{}
io_toposort_timing
=
[]
nb_nodes
=
[]
nb_nodes
=
[]
for
lopt
in
self
.
local_optimizers
:
process_count
.
setdefault
(
lopt
,
0
)
time_lopts
.
setdefault
(
lopt
,
0
)
while
changed
and
not
max_use_abort
:
while
changed
and
not
max_use_abort
:
t0
=
time
.
time
()
t0
=
time
.
time
()
...
@@ -1333,7 +1467,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -1333,7 +1467,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
for
node
in
start_from
:
for
node
in
start_from
:
assert
node
in
env
.
outputs
assert
node
in
env
.
outputs
topo_t0
=
time
.
time
()
q
=
deque
(
graph
.
io_toposort
(
env
.
inputs
,
start_from
))
q
=
deque
(
graph
.
io_toposort
(
env
.
inputs
,
start_from
))
io_toposort_timing
.
append
(
time
.
time
()
-
topo_t0
)
nb_nodes
.
append
(
len
(
q
))
nb_nodes
.
append
(
len
(
q
))
max_nb_nodes
=
max
(
max_nb_nodes
,
len
(
q
))
max_nb_nodes
=
max
(
max_nb_nodes
,
len
(
q
))
...
@@ -1355,9 +1491,11 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -1355,9 +1491,11 @@ class EquilibriumOptimizer(NavigatorOptimizer):
while
q
:
while
q
:
node
=
q
.
pop
()
node
=
q
.
pop
()
current_node
=
node
current_node
=
node
for
lopt
in
self
.
local_optimizers
:
for
lopt
in
self
.
local_optimizers
:
process_count
.
setdefault
(
lopt
,
0
)
t_lopt
=
time
.
time
(
)
lopt_change
=
self
.
process_node
(
env
,
node
,
lopt
)
lopt_change
=
self
.
process_node
(
env
,
node
,
lopt
)
time_lopts
[
lopt
]
+=
time
.
time
()
-
t_lopt
if
lopt_change
:
if
lopt_change
:
process_count
[
lopt
]
+=
1
process_count
[
lopt
]
+=
1
changed
=
True
changed
=
True
...
@@ -1402,6 +1540,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -1402,6 +1540,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print
'
%
d -
%
s'
%
(
count
,
opt
)
print
'
%
d -
%
s'
%
(
count
,
opt
)
print
print
return
(
self
,
loop_timing
,
process_count
,
max_nb_nodes
,
global_opt_timing
,
nb_nodes
,
time_lopts
,
io_toposort_timing
)
def
print_summary
(
self
,
stream
=
sys
.
stdout
,
level
=
0
,
depth
=-
1
):
def
print_summary
(
self
,
stream
=
sys
.
stdout
,
level
=
0
,
depth
=-
1
):
name
=
getattr
(
self
,
'name'
,
None
)
name
=
getattr
(
self
,
'name'
,
None
)
print
>>
stream
,
"
%
s
%
s
%
s id=
%
i"
%
(
print
>>
stream
,
"
%
s
%
s
%
s id=
%
i"
%
(
...
@@ -1411,6 +1552,95 @@ class EquilibriumOptimizer(NavigatorOptimizer):
...
@@ -1411,6 +1552,95 @@ class EquilibriumOptimizer(NavigatorOptimizer):
lopt
.
print_summary
(
stream
,
level
=
(
level
+
2
),
lopt
.
print_summary
(
stream
,
level
=
(
level
+
2
),
depth
=
(
depth
-
1
))
depth
=
(
depth
-
1
))
@staticmethod
def
print_profile
(
stream
,
prof
,
level
=
0
):
(
opt
,
loop_timing
,
process_count
,
max_nb_nodes
,
global_opt_timing
,
nb_nodes
,
time_lopts
,
io_toposort_timing
)
=
prof
blanc
=
(
' '
*
level
)
print
>>
stream
,
blanc
,
"EquilibriumOptimizer"
,
print
>>
stream
,
blanc
,
getattr
(
opt
,
"name"
,
getattr
(
opt
,
"__name__"
,
""
))
print
>>
stream
,
blanc
,
" time
%.3
fs for
%
d passes,
%
d nodes max"
%
(
sum
(
loop_timing
),
len
(
loop_timing
),
max_nb_nodes
)
print
>>
stream
,
blanc
,
" time io_toposort
%.3
fs"
%
sum
(
io_toposort_timing
)
for
i
in
range
(
len
(
loop_timing
)):
print
>>
stream
,
blanc
,
(
'
%
d -
%.3
fs (
%.3
fs in global opts, '
'
%.3
fs io_toposort) -
%
d nodes'
%
(
i
,
loop_timing
[
i
],
global_opt_timing
[
i
],
io_toposort_timing
[
i
],
nb_nodes
[
i
]))
count_opt
=
[]
for
opt
,
count
in
process_count
.
iteritems
():
if
count
>
0
:
count_opt
.
append
((
time_lopts
[
opt
],
count
,
opt
))
if
count_opt
:
print
>>
stream
,
blanc
,
'times applied - optimizer (only those applied):'
count_opt
.
sort
()
for
(
t
,
count
,
opt
)
in
count_opt
[::
-
1
]:
print
>>
stream
,
blanc
,
'
%.3
fs -
%
d -
%
s'
%
(
t
,
count
,
opt
)
print
>>
stream
@staticmethod
def
merge_profile
(
prof1
,
prof2
):
#(opt, loop_timing, process_count, max_nb_nodes,
# global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof1
local_optimizers
=
set
(
prof1
[
0
]
.
local_optimizers
)
.
union
(
prof2
[
0
]
.
local_optimizers
)
global_optimizers
=
set
(
prof1
[
0
]
.
global_optimizers
)
.
union
(
prof2
[
0
]
.
global_optimizers
)
new_opt
=
EquilibriumOptimizer
(
local_optimizers
.
union
(
global_optimizers
),
max_use_ratio
=
1
)
def
merge_list
(
l1
,
l2
):
l
=
copy
.
copy
(
l1
)
for
idx
,
nb
in
enumerate
(
l2
):
if
idx
<
len
(
l
):
l
[
idx
]
+=
nb
else
:
l
.
append
(
nb
)
return
l
loop_timing
=
merge_list
(
prof1
[
1
],
prof2
[
1
])
process_count
=
prof1
[
2
]
.
copy
()
for
process
,
count
in
prof2
[
2
]
.
iteritems
():
if
process
in
process_count
:
process_count
[
process
]
+=
count
else
:
process_count
[
process
]
=
count
max_nb_nodes
=
max
(
prof1
[
3
],
prof2
[
3
])
global_opt_timing
=
merge_list
(
prof1
[
4
],
prof2
[
4
])
nb_nodes
=
merge_list
(
prof1
[
5
],
prof2
[
5
])
time_lopts
=
prof1
[
6
]
.
copy
()
for
opt
,
t
in
prof2
[
6
]
.
iteritems
():
if
opt
in
time_lopts
:
time_lopts
[
opt
]
+=
t
else
:
time_lopts
[
opt
]
=
t
io_toposort_timing
=
merge_list
(
prof1
[
7
],
prof2
[
7
])
assert
(
len
(
loop_timing
)
==
len
(
global_opt_timing
)
==
len
(
io_toposort_timing
)
==
len
(
nb_nodes
))
assert
len
(
loop_timing
)
==
max
(
len
(
prof1
[
1
]),
len
(
prof2
[
1
]))
return
(
new_opt
,
loop_timing
,
process_count
,
max_nb_nodes
,
global_opt_timing
,
nb_nodes
,
time_lopts
,
io_toposort_timing
)
#################
#################
### Utilities ###
### Utilities ###
...
...
theano/gof/optdb.py
浏览文件 @
bb096349
...
@@ -229,7 +229,10 @@ class SequenceDB(DB):
...
@@ -229,7 +229,10 @@ class SequenceDB(DB):
opts
=
[
o
for
o
in
opts
if
self
.
__position__
[
o
.
name
]
<
position_cutoff
]
opts
=
[
o
for
o
in
opts
if
self
.
__position__
[
o
.
name
]
<
position_cutoff
]
opts
.
sort
(
key
=
lambda
obj
:
self
.
__position__
[
obj
.
name
])
opts
.
sort
(
key
=
lambda
obj
:
self
.
__position__
[
obj
.
name
])
return
opt
.
SeqOptimizer
(
opts
,
failure_callback
=
self
.
failure_callback
)
ret
=
opt
.
SeqOptimizer
(
opts
,
failure_callback
=
self
.
failure_callback
)
if
hasattr
(
tags
[
0
],
'name'
):
ret
.
name
=
tags
[
0
]
.
name
return
ret
def
print_summary
(
self
,
stream
=
sys
.
stdout
):
def
print_summary
(
self
,
stream
=
sys
.
stdout
):
print
>>
stream
,
"SequenceDB (id
%
i)"
%
id
(
self
)
print
>>
stream
,
"SequenceDB (id
%
i)"
%
id
(
self
)
...
...
theano/gof/toolbox.py
浏览文件 @
bb096349
import
sys
import
sys
import
time
from
theano.gof.python25
import
partial
from
theano.gof.python25
import
partial
...
@@ -71,10 +72,20 @@ class History:
...
@@ -71,10 +72,20 @@ class History:
class
Validator
:
class
Validator
:
def
on_attach
(
self
,
env
):
def
on_attach
(
self
,
env
):
if
hasattr
(
env
,
'validate'
):
for
attr
in
(
'validate'
,
'validate_time'
):
raise
AlreadyThere
(
"Validator feature is already present or in"
if
hasattr
(
env
,
attr
):
" conflict with another plugin."
)
raise
AlreadyThere
(
"Validator feature is already present or in"
env
.
validate
=
lambda
:
env
.
execute_callbacks
(
'validate'
)
" conflict with another plugin."
)
def
validate
():
t0
=
time
.
time
()
ret
=
env
.
execute_callbacks
(
'validate'
)
t1
=
time
.
time
()
if
env
.
profile
:
env
.
profile
.
validate_time
+=
t1
-
t0
return
ret
env
.
validate
=
validate
def
consistent
():
def
consistent
():
try
:
try
:
...
...
theano/gof/vm.py
浏览文件 @
bb096349
...
@@ -17,6 +17,9 @@ logger = logging.getLogger(__name__)
...
@@ -17,6 +17,9 @@ logger = logging.getLogger(__name__)
AddConfigVar
(
'profile'
,
AddConfigVar
(
'profile'
,
"If VM should collect profile information"
,
"If VM should collect profile information"
,
BoolParam
(
False
))
BoolParam
(
False
))
AddConfigVar
(
'profile_optimizer'
,
"If VM should collect optimizer profile information"
,
BoolParam
(
False
))
raise_with_op
=
link
.
raise_with_op
raise_with_op
=
link
.
raise_with_op
...
...
theano/sandbox/cuda/tests/test_driver.py
浏览文件 @
bb096349
...
@@ -24,7 +24,8 @@ def test_nvidia_driver1():
...
@@ -24,7 +24,8 @@ def test_nvidia_driver1():
"""
"""
a
=
numpy
.
random
.
rand
(
10000
)
.
astype
(
"float32"
)
a
=
numpy
.
random
.
rand
(
10000
)
.
astype
(
"float32"
)
A
=
cuda
.
shared_constructor
(
a
)
A
=
cuda
.
shared_constructor
(
a
)
f
=
theano
.
function
(
inputs
=
[],
outputs
=
A
.
sum
(),
mode
=
mode_with_gpu
)
f
=
theano
.
function
(
inputs
=
[],
outputs
=
A
.
sum
(),
mode
=
mode_with_gpu
,
profile
=
False
)
topo
=
f
.
maker
.
env
.
toposort
()
topo
=
f
.
maker
.
env
.
toposort
()
assert
len
(
topo
)
==
2
assert
len
(
topo
)
==
2
assert
sum
(
isinstance
(
node
.
op
,
B
.
GpuSum
)
for
node
in
topo
)
==
1
assert
sum
(
isinstance
(
node
.
op
,
B
.
GpuSum
)
for
node
in
topo
)
==
1
...
@@ -56,7 +57,8 @@ def test_nvidia_driver3():
...
@@ -56,7 +57,8 @@ def test_nvidia_driver3():
of the gpu device
of the gpu device
"""
"""
var
=
cuda
.
fvector
()
var
=
cuda
.
fvector
()
f
=
theano
.
function
([
var
],
var
+
1
,
mode
=
mode_with_gpu
)
f
=
theano
.
function
([
var
],
var
+
1
,
mode
=
mode_with_gpu
,
profile
=
False
)
topo
=
f
.
maker
.
env
.
toposort
()
topo
=
f
.
maker
.
env
.
toposort
()
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
topo
])
assert
any
([
isinstance
(
node
.
op
,
cuda
.
GpuElemwise
)
for
node
in
topo
])
assert
theano
.
sandbox
.
cuda
.
use
.
device_number
is
not
None
assert
theano
.
sandbox
.
cuda
.
use
.
device_number
is
not
None
...
...
theano/scan_module/scan.py
浏览文件 @
bb096349
...
@@ -794,7 +794,8 @@ def scan(fn,
...
@@ -794,7 +794,8 @@ def scan(fn,
updates
=
updates
,
updates
=
updates
,
mode
=
compile
.
mode
.
Mode
(
linker
=
'py'
,
mode
=
compile
.
mode
.
Mode
(
linker
=
'py'
,
optimizer
=
None
),
optimizer
=
None
),
on_unused_input
=
'ignore'
)
on_unused_input
=
'ignore'
,
profile
=
False
)
##
##
### Step 5. Re-arange inputs of scan into a more strict order
### Step 5. Re-arange inputs of scan into a more strict order
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论