提交 075dd793 authored 作者: Frederic Bastien's avatar Frederic Bastien

Add printing of the 5 slowest node make_thunk in the profile

上级 83d5709a
...@@ -19,6 +19,7 @@ __docformat__ = "restructuredtext en" ...@@ -19,6 +19,7 @@ __docformat__ = "restructuredtext en"
import atexit import atexit
import copy import copy
import operator
import os import os
import sys import sys
import time import time
...@@ -78,7 +79,8 @@ def _atexit_print_fn(): ...@@ -78,7 +79,8 @@ def _atexit_print_fn():
# merge dictonary # merge dictonary
for attr in ["apply_time", "apply_callcount", for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "variable_shape", "variable_strides"]: "apply_cimpl", "variable_shape", "variable_strides",
"linker_make_thunk_time"]:
cum_attr = getattr(cum, attr) cum_attr = getattr(cum, attr)
for key, val in iteritems(getattr(ps, attr)): for key, val in iteritems(getattr(ps, attr)):
assert key not in cum_attr assert key not in cum_attr
...@@ -193,6 +195,8 @@ class ProfileStats(object): ...@@ -193,6 +195,8 @@ class ProfileStats(object):
linker_node_make_thunks = 0.0 linker_node_make_thunks = 0.0
linker_make_thunk_time = {}
line_width = config.profiling.output_line_width line_width = config.profiling.output_line_width
nb_nodes = -1 nb_nodes = -1
...@@ -670,6 +674,11 @@ class ProfileStats(object): ...@@ -670,6 +674,11 @@ class ProfileStats(object):
print(' Import time %es' % self.import_time, file=file) print(' Import time %es' % self.import_time, file=file)
print(' Node make_thunk time %es' % self.linker_node_make_thunks, print(' Node make_thunk time %es' % self.linker_node_make_thunks,
file=file) file=file)
for node, t in sorted(self.linker_make_thunk_time.items(),
key=operator.itemgetter(1))[:5]:
print(' Node %s time %es' % (node, t),
file=file)
print('', file=file) print('', file=file)
# The validation time is a subset of optimizer_time # The validation time is a subset of optimizer_time
......
...@@ -1041,16 +1041,19 @@ class VM_Linker(link.LocalLinker): ...@@ -1041,16 +1041,19 @@ class VM_Linker(link.LocalLinker):
reallocated_info = calculate_reallocate_info( reallocated_info = calculate_reallocate_info(
order, fgraph, storage_map, compute_map_re, dependencies) order, fgraph, storage_map, compute_map_re, dependencies)
t0 = time.time() t0 = time.time()
linker_make_thunk_time = {}
impl = None
if self.c_thunks is False:
impl = 'py'
for node in order: for node in order:
try: try:
impl = None thunk_start = time.time()
if self.c_thunks is False:
impl = 'py'
thunks.append(node.op.make_thunk(node, thunks.append(node.op.make_thunk(node,
storage_map, storage_map,
compute_map, compute_map,
no_recycling, no_recycling,
impl=impl)) impl=impl))
linker_make_thunk_time[node] = time.time() - thunk_start
if not hasattr(thunks[-1], 'lazy'): if not hasattr(thunks[-1], 'lazy'):
# We don't want all ops maker to think about lazy Ops. # We don't want all ops maker to think about lazy Ops.
# So if they didn't specify that its lazy or not, it isn't. # So if they didn't specify that its lazy or not, it isn't.
...@@ -1064,6 +1067,7 @@ class VM_Linker(link.LocalLinker): ...@@ -1064,6 +1067,7 @@ class VM_Linker(link.LocalLinker):
if self.profile: if self.profile:
self.profile.linker_node_make_thunks += t1 - t0 self.profile.linker_node_make_thunks += t1 - t0
self.profile.linker_make_thunk_time = linker_make_thunk_time
for node, thunk in zip(order, thunks): for node, thunk in zip(order, thunks):
thunk.inputs = [storage_map[v] for v in node.inputs] thunk.inputs = [storage_map[v] for v in node.inputs]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论