提交 8c5cad8f authored 作者: ballasn's avatar ballasn 提交者: GitHub

Merge pull request #5298 from Faruk-Ahmed/print_profile_stats_at_exit

Print profile stats
......@@ -19,6 +19,7 @@ from theano import config, gof
from theano.compat import izip
from theano.gof import graph
import theano.compile.mode
import theano.compile.profiling
from theano.compile.io import (
In, SymbolicInput, SymbolicOutput)
from theano.compile.ops import deep_copy_op, view_op
......@@ -663,7 +664,7 @@ class Function(object):
input_storage = [i.value for i in ins]
# reinitialize new maker and create new function
if profile is None:
profile = config.profile
profile = config.profile or config.print_global_stats
# profile -> True or False
if profile is True:
if name:
......@@ -944,6 +945,7 @@ class Function(object):
#
dt_call = time.time() - t0
theano.compile.profiling.total_fct_exec_time += dt_call
self.maker.mode.call_time += dt_call
if profile:
profile.fct_callcount += 1
......@@ -1473,6 +1475,7 @@ class FunctionMaker(object):
end_optimizer = time.time()
opt_time = end_optimizer - start_optimizer
theano.compile.profiling.total_graph_opt_time += opt_time
if profile:
profile.optimizer_time += opt_time
if theano.config.profile_optimizer:
......@@ -1662,6 +1665,7 @@ class FunctionMaker(object):
end_linker = time.time()
linker_time = end_linker - start_linker
theano.compile.profiling.total_time_linker += linker_time
_logger.debug('Linker took %f seconds', linker_time)
if self.profile:
self.profile.linker_time += linker_time
......
......@@ -364,7 +364,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
if givens is None:
givens = []
if profile is None:
profile = config.profile
profile = config.profile or config.print_global_stats
# profile -> True or False
if profile is False:
profile = None
......
......@@ -36,6 +36,9 @@ from theano.gof import graph
logger = logging.getLogger('theano.compile.profiling')
theano_imported_time = time.time()
total_fct_exec_time = 0.
total_graph_opt_time = 0.
total_time_linker = 0.
config = theano.config
_atexit_print_list = []
......@@ -47,7 +50,80 @@ def _atexit_print_fn():
Print ProfileStat objects in _atexit_print_list to _atexit_print_file.
"""
to_sum = []
if config.profile:
to_sum = []
if config.profiling.destination == 'stderr':
destination_file = sys.stderr
elif config.profiling.destination == 'stdout':
destination_file = sys.stdout
else:
destination_file = open(config.profiling.destination, 'w')
# Reverse sort in the order of compile+exec time
for ps in sorted(_atexit_print_list,
key=lambda a:a.compile_time + a.fct_call_time)[::-1]:
if ps.fct_callcount >= 1 or ps.compile_time > 1:
ps.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
if not isinstance(ps, ScanProfileStats):
to_sum.append(ps)
else:
# TODO print the name if there is one!
print('Skipping empty Profile')
if len(to_sum) > 1:
# Make a global profile
cum = copy.copy(to_sum[0])
msg = ("Sum of all(%d) printed profiles at exit excluding Scan op"
" profile." % len(to_sum))
cum.message = msg
for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time",
"validate_time", "import_time",
"linker_node_make_thunks"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary
for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "variable_shape", "variable_strides",
"linker_make_thunk_time"]:
cum_attr = getattr(cum, attr)
for key, val in iteritems(getattr(ps, attr)):
assert key not in cum_attr
cum_attr[key] = val
if cum.optimizer_profile and ps.optimizer_profile:
try:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1],
ps.optimizer_profile[1])
assert len(merge) == len(cum.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
except Exception as e:
print("Got an exception while merging profile")
print(e)
cum.optimizer_profile = None
else:
cum.optimizer_profile = None
cum.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
if config.print_global_stats:
print_global_stats()
def print_global_stats():
"""
Print the following stats:
-- Time elapsed since Theano was imported
-- Time spent inside Theano functions
-- Time spent in compiling Theano functions
-- on graph optimization
-- on linker
"""
if config.profiling.destination == 'stderr':
destination_file = sys.stderr
......@@ -56,57 +132,18 @@ def _atexit_print_fn():
else:
destination_file = open(config.profiling.destination, 'w')
# Reverse sort in the order of compile+exec time
for ps in sorted(_atexit_print_list,
key=lambda a:a.compile_time + a.fct_call_time)[::-1]:
if ps.fct_callcount >= 1 or ps.compile_time > 1:
ps.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
if not isinstance(ps, ScanProfileStats):
to_sum.append(ps)
else:
# TODO print the name if there is one!
print('Skipping empty Profile')
if len(to_sum) > 1:
# Make a global profile
cum = copy.copy(to_sum[0])
msg = ("Sum of all(%d) printed profiles at exit excluding Scan op"
" profile." % len(to_sum))
cum.message = msg
for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time",
"validate_time", "import_time",
"linker_node_make_thunks"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary
for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "variable_shape", "variable_strides",
"linker_make_thunk_time"]:
cum_attr = getattr(cum, attr)
for key, val in iteritems(getattr(ps, attr)):
assert key not in cum_attr
cum_attr[key] = val
if cum.optimizer_profile and ps.optimizer_profile:
try:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1],
ps.optimizer_profile[1])
assert len(merge) == len(cum.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
except Exception as e:
print("Got an exception while merging profile")
print(e)
cum.optimizer_profile = None
else:
cum.optimizer_profile = None
cum.summary(file=destination_file,
n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply)
print('='*50, file=destination_file)
print('Global stats: ',
'Time elasped since Theano import = %6.3fs, '
'Time spent in Theano functions = %6.3fs, '
'Time spent compiling Theano functions: '
' optimzation = %6.3fs, linker = %6.3fs ' %
(time.time() - theano_imported_time,
total_fct_exec_time,
total_graph_opt_time,
total_time_linker),
file=destination_file)
print('='*50, file=destination_file)
class ProfileStats(object):
......
......@@ -126,6 +126,12 @@ AddConfigVar(
BoolParam(False, allow_override=False),
in_c_key=False)
AddConfigVar(
'print_global_stats',
"Print some global statistics (time spent) at the end",
BoolParam(False),
in_c_key=False)
class ContextsParam(ConfigParam):
def __init__(self):
......
......@@ -482,7 +482,7 @@ class Stack(VM):
try:
_, dt = self.run_thunk_of_node(current_apply)
del _
if config.profile:
if config.profile or config.print_global_stats:
current_idx = self.node_idx[current_apply]
self.call_counts[current_idx] += 1
self.call_times[current_idx] += dt
......@@ -596,7 +596,7 @@ class Stack(VM):
if current_apply.inputs[r].owner:
apply_stack.append(current_apply.inputs[r].owner)
else:
if config.profile:
if config.profile or config.print_global_stats:
for (idx, o) in enumerate(thunks[
self.node_idx[current_apply]].outputs):
var = self.nodes[
......@@ -757,7 +757,7 @@ class VM_Linker(link.LocalLinker):
associated to self, else, a new VM_Linker associated to fgraph.
"""
if (config.profile and
if ((config.profile or config.print_global_stats) and
((hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled) or
......@@ -856,7 +856,7 @@ class VM_Linker(link.LocalLinker):
pre_call_clear = [storage_map[v] for v in self.no_recycling]
if (self.callback is not None or self.callback_input is not None or
(config.profile and config.profile_memory) or
((config.profile or config.print_global_stats) and config.profile_memory) or
(self.allow_partial_eval and not self.use_cloop)):
if self.use_cloop and (self.callback is not None or
......@@ -1086,7 +1086,7 @@ class VM_Linker(link.LocalLinker):
lazy = config.vm.lazy
if lazy is None:
lazy = not all([(not th.lazy) for th in thunks])
if not (lazy or (config.profile and config.profile_memory) or
if not (lazy or ((config.profile or config.print_global_stats) and config.profile_memory) or
self.use_cloop or self.callback or self.callback_input):
for pair in itervalues(reallocated_info):
storage_map[pair[1]] = storage_map[pair[0]]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论