Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
33a899b2
提交
33a899b2
authored
8月 14, 2015
作者:
Iban Harlouchet
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
numpydoc for theano/compile/profiling.py
上级
8d57251e
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
93 行增加
和
40 行删除
+93
-40
profiling.py
theano/compile/profiling.py
+93
-40
没有找到文件。
theano/compile/profiling.py
浏览文件 @
33a899b2
"""ProfileStats object for runtime and memory profiling.
"""
ProfileStats object for runtime and memory profiling.
"""
from
__future__
import
print_function
#
...
...
@@ -76,7 +78,9 @@ AddConfigVar('profiling.destination',
def
_atexit_print_fn
():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file
"""
Print ProfileStat objects in _atexit_print_list to _atexit_print_file.
"""
to_sum
=
[]
...
...
@@ -135,6 +139,16 @@ class ProfileStats(object):
"""
Object to store runtime and memory profiling information for all of
Theano's operations: compilation, optimization, execution.
Parameters
----------
atexit_print : bool
True means that this object will be printed to stderr (using .summary())
at the end of the program.
**kwargs : misc initializers
These should (but need not) match the names of the class vars declared
in this class.
"""
#
...
...
@@ -212,12 +226,6 @@ class ProfileStats(object):
# param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
def
__init__
(
self
,
atexit_print
=
True
,
flag_time_thunks
=
None
,
**
kwargs
):
"""
atexit_print - bool. True means that this object will be printed to
stderr (using .summary()) at the end of the program.
**kwargs - misc initializers. These should (but need not) match the
names of the class vars declared in this class.
"""
if
(
hasattr
(
theano
,
'sandbox'
)
and
hasattr
(
theano
.
sandbox
,
'cuda'
)
and
theano
.
sandbox
.
cuda
.
cuda_enabled
):
...
...
@@ -250,7 +258,10 @@ class ProfileStats(object):
_atexit_registered
=
True
def
class_time
(
self
):
"""dict op -> total time on thunks"""
"""
dict op -> total time on thunks
"""
# timing is stored by node, we compute timing by class on demand
rval
=
{}
for
node
,
t
in
iteritems
(
self
.
apply_time
):
...
...
@@ -260,7 +271,10 @@ class ProfileStats(object):
return
rval
def
class_callcount
(
self
):
"""dict op -> total number of thunk calls"""
"""
dict op -> total number of thunk calls
"""
# timing is stored by node, we compute timing by class on demand
rval
=
{}
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
...
...
@@ -270,7 +284,10 @@ class ProfileStats(object):
return
rval
def
class_nodes
(
self
):
"""dict op -> total number of nodes"""
"""
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by class on demand
rval
=
{}
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
...
...
@@ -280,7 +297,10 @@ class ProfileStats(object):
return
rval
def
class_impl
(
self
):
"""dict op -> total number of nodes"""
"""
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by class on demand
rval
=
{}
for
node
in
self
.
apply_callcount
:
...
...
@@ -295,7 +315,10 @@ class ProfileStats(object):
return
rval
def
op_time
(
self
):
"""dict op -> total time on thunks"""
"""
dict op -> total time on thunks
"""
# timing is stored by node, we compute timing by Op on demand
rval
=
{}
for
node
,
t
in
iteritems
(
self
.
apply_time
):
...
...
@@ -304,7 +327,10 @@ class ProfileStats(object):
return
rval
def
fill_node_total_time
(
self
,
node
,
total_times
):
"""node -> fill total time icluding its parents (returns nothing)"""
"""
node -> fill total time icluding its parents (returns nothing)
"""
# timing is stored by node, we compute total time on demand
total
=
self
.
apply_time
[
node
]
for
parent
in
node
.
get_parents
():
...
...
@@ -315,7 +341,10 @@ class ProfileStats(object):
total_times
[
node
]
=
total
def
compute_total_times
(
self
):
"""dict op -> total time icluding the time for parents"""
"""
dict op -> total time icluding the time for parents
"""
rval
=
{}
for
node
in
self
.
apply_time
:
if
node
not
in
rval
:
...
...
@@ -323,7 +352,10 @@ class ProfileStats(object):
return
rval
def
op_callcount
(
self
):
"""dict op -> total number of thunk calls"""
"""
dict op -> total number of thunk calls
"""
# timing is stored by node, we compute timing by Op on demand
rval
=
{}
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
...
...
@@ -332,7 +364,10 @@ class ProfileStats(object):
return
rval
def
op_nodes
(
self
):
"""dict op -> total number of nodes"""
"""
dict op -> total number of nodes
"""
# timing is stored by node, we compute timing by Op on demand
rval
=
{}
for
node
,
count
in
iteritems
(
self
.
apply_callcount
):
...
...
@@ -341,7 +376,10 @@ class ProfileStats(object):
return
rval
def
op_impl
(
self
):
"""dict op -> 'C' or 'Py' depending how the op is implemented"""
"""
dict op -> 'C' or 'Py' depending how the op is implemented
"""
# timing is stored by node, we compute timing by Op on demand
rval
=
{}
for
node
in
self
.
apply_callcount
:
...
...
@@ -711,21 +749,23 @@ class ProfileStats(object):
def
count_running_memory
(
order
,
fgraph
,
nodes_mem
):
"""
Calculate memory with specific node order
Calculate memory with specific node order.
Return a list including the following values
1. node_memory_size
Sum of the size of all variables that actually allocate
memory (excluding views, and inplace)
;
2. running_memory_size
The memory allocated after the current apply node
3. running_max_memory_size
The maximum of running_memory_size during the function
memory (excluding views, and inplace)
.
2.
running_memory_size
The memory allocated after the current apply node
.
3.
running_max_memory_size
The maximum of running_memory_size during the function
.
4. node_memory_saved_by_view
The sum of memory saved by returning view instead of new
allocation
allocation
.
5. node_memory_saved_by_inplace
The sum of memory saved by reusing the input instead of
new allocation
new allocation.
"""
from
theano.sandbox.cuda
import
CudaNdarrayType
# Initial Mem info values [CPU, GPU]
...
...
@@ -874,10 +914,14 @@ class ProfileStats(object):
def
min_memory_generator
(
executable_nodes
,
viewed_by
,
view_of
):
"""
Generate all valid node order from node_list
and compute its memory peak.
Generate all valid node order from node_list and compute its
memory peak.
Parameters
----------
executable_nodes
Set of executable nodes.
:param executable_nodes: Set of executable nodes
"""
global
mem_count
,
mem_bound
,
max_mem_count
...
...
@@ -1255,9 +1299,13 @@ if False: # old code still to be ported from ProfileMode
"""
Print a readable summary of the stats.
param: n_apply_to_print the number of apply to print. Default 15.
Parameters
----------
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
param: n_ops_to_print the number of ops to print. Default 20.
"""
local_time
=
sum
(
self
.
apply_time
.
values
())
...
...
@@ -1483,11 +1531,13 @@ if False: # old code still to be ported from ProfileMode
There is a hack with the Op-wise summary. Go see it if you want to know
more.
:param n_apply_to_print: the number of apply to print. Default 15, or
n_ops_to_print flag.
Parameters
----------
n_apply_to_print
The number of apply to print. Default 15, or n_ops_to_print flag.
n_ops_to_print
The number of ops to print. Default 20, or n_apply_to_print flag.
:param n_ops_to_print: the number of ops to print. Default 20, or
n_apply_to_print flag.
"""
fct_call_time
=
self
.
mode
.
fct_call_time
fct_call
=
self
.
mode
.
fct_call
...
...
@@ -1517,12 +1567,15 @@ if False: # old code still to be ported from ProfileMode
now.
TODO: make comparaison with gpu code.
:param other: the other instance of ProfileMode that we want to be
compared to.
:param n_apply_to_print: the number of apply to print. Default 15.
Parameters
----------
other
The other instance of ProfileMode that we want to be compared to.
n_apply_to_print
The number of apply to print. Default 15.
n_ops_to_print
The number of ops to print. Default 20.
:param n_ops_to_print: the number of ops to print. Default 20.
"""
def
diff_dict
(
a_time
,
b_time_
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论