Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
33a899b2
提交
33a899b2
authored
8月 14, 2015
作者:
Iban Harlouchet
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
numpydoc for theano/compile/profiling.py
上级
8d57251e
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
93 行增加
和
40 行删除
+93
-40
profiling.py
theano/compile/profiling.py
+93
-40
没有找到文件。
theano/compile/profiling.py
浏览文件 @
33a899b2
"""ProfileStats object for runtime and memory profiling.
"""
ProfileStats object for runtime and memory profiling.
"""
"""
from
__future__
import
print_function
from
__future__
import
print_function
#
#
...
@@ -76,7 +78,9 @@ AddConfigVar('profiling.destination',
...
@@ -76,7 +78,9 @@ AddConfigVar('profiling.destination',
def
_atexit_print_fn
():
def
_atexit_print_fn
():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file
"""
Print ProfileStat objects in _atexit_print_list to _atexit_print_file.
"""
"""
to_sum
=
[]
to_sum
=
[]
...
@@ -135,6 +139,16 @@ class ProfileStats(object):
...
@@ -135,6 +139,16 @@ class ProfileStats(object):
"""
"""
Object to store runtime and memory profiling information for all of
Object to store runtime and memory profiling information for all of
Theano's operations: compilation, optimization, execution.
Theano's operations: compilation, optimization, execution.
Parameters
----------
atexit_print : bool
True means that this object will be printed to stderr (using .summary())
at the end of the program.
**kwargs : misc initializers
These should (but need not) match the names of the class vars declared
in this class.
"""
"""
#
#
...
@@ -212,12 +226,6 @@ class ProfileStats(object):
...
@@ -212,12 +226,6 @@ class ProfileStats(object):
# param is called flag_time_thunks because most other attributes with time
# param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
# in the name are times *of* something, rather than configuration flags.
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
**
kwargs
):
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
**
kwargs
):
"""
atexit_print - bool. True means that this object will be printed to
stderr (using .summary()) at the end of the program.
**kwargs - misc initializers. These should (but need not) match the
names of the class vars declared in this class.
"""
if
(
hasattr
(
theano
,
'sandbox'
)
and
if
(
hasattr
(
theano
,
'sandbox'
)
and
hasattr
(
theano
.
sandbox
,
'cuda'
)
and
hasattr
(
theano
.
sandbox
,
'cuda'
)
and
theano
.
sandbox
.
cuda
.
cuda_enabled
):
theano
.
sandbox
.
cuda
.
cuda_enabled
):
...
@@ -250,7 +258,10 @@ class ProfileStats(object):
...
@@ -250,7 +258,10 @@ class ProfileStats(object):
_atexit_registered
=
True
_atexit_registered
=
True
def
class_time
(
self
):
def
class_time
(
self
):
"""dict op -> total time on thunks"""
"""
dict op -> total time on thunks
"""
# timing is stored by node, we compute timing by class on demand
# timing is stored by node, we compute timing by class on demand
rval
=
{}
rval
=
{}
for
node
,
t
in
iteritems
(
self
.
apply_time
):
for
node
,
t
in
iteritems
(
self
.
apply_time
):
...
@@ -260,7 +271,10 @@ class ProfileStats(object):
...
@@ -260,7 +271,10 @@ class ProfileStats(object):
return
rval
return
rval
def
class_callcount
(
self
):
def
class_callcount
(
self
):
"""dict op -> total number of thunk calls"""
"""
dict op -> total number of thunk calls
"""
# timing is stored by node, we compute timing by class on demand
# timing is stored by node, we compute timing by class on demand
rval
=
{}
rval
=
{}
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
...
@@ -270,7 +284,10 @@ class ProfileStats(object):
...
@@ -270,7 +284,10 @@ class ProfileStats(object):
return
rval
return
rval
def
class_nodes
(
self
):
def
class_nodes
(
self
):
"""dict op -> total number of nodes"""
"""
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by class on demand
# timing is stored by node, we compute timing by class on demand
rval
=
{}
rval
=
{}
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
...
@@ -280,7 +297,10 @@ class ProfileStats(object):
...
@@ -280,7 +297,10 @@ class ProfileStats(object):
return
rval
return
rval
def
class_impl
(
self
):
def
class_impl
(
self
):
"""dict op -> total number of nodes"""
"""
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by class on demand
# timing is stored by node, we compute timing by class on demand
rval
=
{}
rval
=
{}
for
node
in
self
.
apply_callcount
:
for
node
in
self
.
apply_callcount
:
...
@@ -295,7 +315,10 @@ class ProfileStats(object):
...
@@ -295,7 +315,10 @@ class ProfileStats(object):
return
rval
return
rval
def
op_time
(
self
):
def
op_time
(
self
):
"""dict op -> total time on thunks"""
"""
dict op -> total time on thunks
"""
# timing is stored by node, we compute timing by Op on demand
# timing is stored by node, we compute timing by Op on demand
rval
=
{}
rval
=
{}
for
node
,
t
in
iteritems
(
self
.
apply_time
):
for
node
,
t
in
iteritems
(
self
.
apply_time
):
...
@@ -304,7 +327,10 @@ class ProfileStats(object):
...
@@ -304,7 +327,10 @@ class ProfileStats(object):
return
rval
return
rval
def
fill_node_total_time
(
self
,
node
,
total_times
):
def
fill_node_total_time
(
self
,
node
,
total_times
):
"""node -> fill total time icluding its parents (returns nothing)"""
"""
node -> fill total time icluding its parents (returns nothing)
"""
# timing is stored by node, we compute total time on demand
# timing is stored by node, we compute total time on demand
total
=
self
.
apply_time
[
node
]
total
=
self
.
apply_time
[
node
]
for
parent
in
node
.
get_parents
():
for
parent
in
node
.
get_parents
():
...
@@ -315,7 +341,10 @@ class ProfileStats(object):
...
@@ -315,7 +341,10 @@ class ProfileStats(object):
total_times
[
node
]
=
total
total_times
[
node
]
=
total
def
compute_total_times
(
self
):
def
compute_total_times
(
self
):
"""dict op -> total time icluding the time for parents"""
"""
dict op -> total time icluding the time for parents
"""
rval
=
{}
rval
=
{}
for
node
in
self
.
apply_time
:
for
node
in
self
.
apply_time
:
if
node
not
in
rval
:
if
node
not
in
rval
:
...
@@ -323,7 +352,10 @@ class ProfileStats(object):
...
@@ -323,7 +352,10 @@ class ProfileStats(object):
return
rval
return
rval
def
op_callcount
(
self
):
def
op_callcount
(
self
):
"""dict op -> total number of thunk calls"""
"""
dict op -> total number of thunk calls
"""
# timing is stored by node, we compute timing by Op on demand
# timing is stored by node, we compute timing by Op on demand
rval
=
{}
rval
=
{}
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
...
@@ -332,7 +364,10 @@ class ProfileStats(object):
...
@@ -332,7 +364,10 @@ class ProfileStats(object):
return
rval
return
rval
def
op_nodes
(
self
):
def
op_nodes
(
self
):
"""dict op -> total number of nodes"""
"""
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by Op on demand
# timing is stored by node, we compute timing by Op on demand
rval
=
{}
rval
=
{}
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
...
@@ -341,7 +376,10 @@ class ProfileStats(object):
...
@@ -341,7 +376,10 @@ class ProfileStats(object):
return
rval
return
rval
def
op_impl
(
self
):
def
op_impl
(
self
):
"""dict op -> 'C' or 'Py' depending how the op is implemented"""
"""
dict op -> 'C' or 'Py' depending how the op is implemented
"""
# timing is stored by node, we compute timing by Op on demand
# timing is stored by node, we compute timing by Op on demand
rval
=
{}
rval
=
{}
for
node
in
self
.
apply_callcount
:
for
node
in
self
.
apply_callcount
:
...
@@ -711,21 +749,23 @@ class ProfileStats(object):
...
@@ -711,21 +749,23 @@ class ProfileStats(object):
def
count_running_memory
(
order
,
fgraph
,
nodes_mem
):
def
count_running_memory
(
order
,
fgraph
,
nodes_mem
):
"""
"""
Calculate memory with specific node order
Calculate memory with specific node order.
Return a list including the following values
Return a list including the following values
1. node_memory_size
1. node_memory_size
Sum of the size of all variables that actually allocate
Sum of the size of all variables that actually allocate
memory (excluding views, and inplace)
;
memory (excluding views, and inplace)
.
2. running_memory_size
2.
running_memory_size
The memory allocated after the current apply node
The memory allocated after the current apply node
.
3. running_max_memory_size
3.
running_max_memory_size
The maximum of running_memory_size during the function
The maximum of running_memory_size during the function
.
4. node_memory_saved_by_view
4. node_memory_saved_by_view
The sum of memory saved by returning view instead of new
The sum of memory saved by returning view instead of new
allocation
allocation
.
5. node_memory_saved_by_inplace
5. node_memory_saved_by_inplace
The sum of memory saved by reusing the input instead of
The sum of memory saved by reusing the input instead of
new allocation
new allocation.
"""
"""
from
theano.sandbox.cuda
import
CudaNdarrayType
from
theano.sandbox.cuda
import
CudaNdarrayType
# Initial Mem info values [CPU, GPU]
# Initial Mem info values [CPU, GPU]
...
@@ -874,10 +914,14 @@ class ProfileStats(object):
...
@@ -874,10 +914,14 @@ class ProfileStats(object):
def
min_memory_generator
(
executable_nodes
,
viewed_by
,
view_of
):
def
min_memory_generator
(
executable_nodes
,
viewed_by
,
view_of
):
"""
"""
Generate all valid node order from node_list
Generate all valid node order from node_list and compute its
and compute its memory peak.
memory peak.
Parameters
----------
executable_nodes
Set of executable nodes.
:param executable_nodes: Set of executable nodes
"""
"""
global
mem_count
,
mem_bound
,
max_mem_count
global
mem_count
,
mem_bound
,
max_mem_count
...
@@ -1255,9 +1299,13 @@ if False: # old code still to be ported from ProfileMode
...
@@ -1255,9 +1299,13 @@ if False: # old code still to be ported from ProfileMode
"""
"""
Print a readable summary of the stats.
Print a readable summary of the stats.
param: n_apply_to_print the number of apply to print. Default 15.
Parameters
----------
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
param: n_ops_to_print the number of ops to print. Default 20.
"""
"""
local_time
=
sum
(
self
.
apply_time
.
values
())
local_time
=
sum
(
self
.
apply_time
.
values
())
...
@@ -1483,11 +1531,13 @@ if False: # old code still to be ported from ProfileMode
...
@@ -1483,11 +1531,13 @@ if False: # old code still to be ported from ProfileMode
There is a hack with the Op-wise summary. Go see it if you want to know
There is a hack with the Op-wise summary. Go see it if you want to know
more.
more.
:param n_apply_to_print: the number of apply to print. Default 15, or
Parameters
n_ops_to_print flag.
----------
n_apply_to_print
The number of apply to print. Default 15, or n_ops_to_print flag.
n_ops_to_print
The number of ops to print. Default 20, or n_apply_to_print flag.
:param n_ops_to_print: the number of ops to print. Default 20, or
n_apply_to_print flag.
"""
"""
fct_call_time
=
self
.
mode
.
fct_call_time
fct_call_time
=
self
.
mode
.
fct_call_time
fct_call
=
self
.
mode
.
fct_call
fct_call
=
self
.
mode
.
fct_call
...
@@ -1517,12 +1567,15 @@ if False: # old code still to be ported from ProfileMode
...
@@ -1517,12 +1567,15 @@ if False: # old code still to be ported from ProfileMode
now.
now.
TODO: make comparaison with gpu code.
TODO: make comparaison with gpu code.
:param other: the other instance of ProfileMode that we want to be
Parameters
compared to.
----------
other
:param n_apply_to_print: the number of apply to print. Default 15.
The other instance of ProfileMode that we want to be compared to.
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
:param n_ops_to_print: the number of ops to print. Default 20.
"""
"""
def
diff_dict
(
a_time
,
b_time_
):
def
diff_dict
(
a_time
,
b_time_
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论