提交 8c5cad8f authored 作者: ballasn's avatar ballasn 提交者: GitHub

Merge pull request #5298 from Faruk-Ahmed/print_profile_stats_at_exit

Print profile stats
...@@ -19,6 +19,7 @@ from theano import config, gof ...@@ -19,6 +19,7 @@ from theano import config, gof
from theano.compat import izip from theano.compat import izip
from theano.gof import graph from theano.gof import graph
import theano.compile.mode import theano.compile.mode
import theano.compile.profiling
from theano.compile.io import ( from theano.compile.io import (
In, SymbolicInput, SymbolicOutput) In, SymbolicInput, SymbolicOutput)
from theano.compile.ops import deep_copy_op, view_op from theano.compile.ops import deep_copy_op, view_op
...@@ -663,7 +664,7 @@ class Function(object): ...@@ -663,7 +664,7 @@ class Function(object):
input_storage = [i.value for i in ins] input_storage = [i.value for i in ins]
# reinitialize new maker and create new function # reinitialize new maker and create new function
if profile is None: if profile is None:
profile = config.profile profile = config.profile or config.print_global_stats
# profile -> True or False # profile -> True or False
if profile is True: if profile is True:
if name: if name:
...@@ -944,6 +945,7 @@ class Function(object): ...@@ -944,6 +945,7 @@ class Function(object):
# #
dt_call = time.time() - t0 dt_call = time.time() - t0
theano.compile.profiling.total_fct_exec_time += dt_call
self.maker.mode.call_time += dt_call self.maker.mode.call_time += dt_call
if profile: if profile:
profile.fct_callcount += 1 profile.fct_callcount += 1
...@@ -1473,6 +1475,7 @@ class FunctionMaker(object): ...@@ -1473,6 +1475,7 @@ class FunctionMaker(object):
end_optimizer = time.time() end_optimizer = time.time()
opt_time = end_optimizer - start_optimizer opt_time = end_optimizer - start_optimizer
theano.compile.profiling.total_graph_opt_time += opt_time
if profile: if profile:
profile.optimizer_time += opt_time profile.optimizer_time += opt_time
if theano.config.profile_optimizer: if theano.config.profile_optimizer:
...@@ -1662,6 +1665,7 @@ class FunctionMaker(object): ...@@ -1662,6 +1665,7 @@ class FunctionMaker(object):
end_linker = time.time() end_linker = time.time()
linker_time = end_linker - start_linker linker_time = end_linker - start_linker
theano.compile.profiling.total_time_linker += linker_time
_logger.debug('Linker took %f seconds', linker_time) _logger.debug('Linker took %f seconds', linker_time)
if self.profile: if self.profile:
self.profile.linker_time += linker_time self.profile.linker_time += linker_time
......
...@@ -364,7 +364,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None, ...@@ -364,7 +364,7 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
if givens is None: if givens is None:
givens = [] givens = []
if profile is None: if profile is None:
profile = config.profile profile = config.profile or config.print_global_stats
# profile -> True or False # profile -> True or False
if profile is False: if profile is False:
profile = None profile = None
......
...@@ -36,6 +36,9 @@ from theano.gof import graph ...@@ -36,6 +36,9 @@ from theano.gof import graph
logger = logging.getLogger('theano.compile.profiling') logger = logging.getLogger('theano.compile.profiling')
theano_imported_time = time.time() theano_imported_time = time.time()
total_fct_exec_time = 0.
total_graph_opt_time = 0.
total_time_linker = 0.
config = theano.config config = theano.config
_atexit_print_list = [] _atexit_print_list = []
...@@ -47,6 +50,7 @@ def _atexit_print_fn(): ...@@ -47,6 +50,7 @@ def _atexit_print_fn():
Print ProfileStat objects in _atexit_print_list to _atexit_print_file. Print ProfileStat objects in _atexit_print_list to _atexit_print_file.
""" """
if config.profile:
to_sum = [] to_sum = []
if config.profiling.destination == 'stderr': if config.profiling.destination == 'stderr':
...@@ -108,6 +112,39 @@ def _atexit_print_fn(): ...@@ -108,6 +112,39 @@ def _atexit_print_fn():
n_ops_to_print=config.profiling.n_ops, n_ops_to_print=config.profiling.n_ops,
n_apply_to_print=config.profiling.n_apply) n_apply_to_print=config.profiling.n_apply)
if config.print_global_stats:
print_global_stats()
def print_global_stats():
"""
Print the following stats:
-- Time elapsed since Theano was imported
-- Time spent inside Theano functions
-- Time spent in compiling Theano functions
-- on graph optimization
-- on linker
"""
if config.profiling.destination == 'stderr':
destination_file = sys.stderr
elif config.profiling.destination == 'stdout':
destination_file = sys.stdout
else:
destination_file = open(config.profiling.destination, 'w')
print('='*50, file=destination_file)
print('Global stats: ',
'Time elasped since Theano import = %6.3fs, '
'Time spent in Theano functions = %6.3fs, '
'Time spent compiling Theano functions: '
' optimzation = %6.3fs, linker = %6.3fs ' %
(time.time() - theano_imported_time,
total_fct_exec_time,
total_graph_opt_time,
total_time_linker),
file=destination_file)
print('='*50, file=destination_file)
class ProfileStats(object): class ProfileStats(object):
......
...@@ -126,6 +126,12 @@ AddConfigVar( ...@@ -126,6 +126,12 @@ AddConfigVar(
BoolParam(False, allow_override=False), BoolParam(False, allow_override=False),
in_c_key=False) in_c_key=False)
AddConfigVar(
'print_global_stats',
"Print some global statistics (time spent) at the end",
BoolParam(False),
in_c_key=False)
class ContextsParam(ConfigParam): class ContextsParam(ConfigParam):
def __init__(self): def __init__(self):
......
...@@ -482,7 +482,7 @@ class Stack(VM): ...@@ -482,7 +482,7 @@ class Stack(VM):
try: try:
_, dt = self.run_thunk_of_node(current_apply) _, dt = self.run_thunk_of_node(current_apply)
del _ del _
if config.profile: if config.profile or config.print_global_stats:
current_idx = self.node_idx[current_apply] current_idx = self.node_idx[current_apply]
self.call_counts[current_idx] += 1 self.call_counts[current_idx] += 1
self.call_times[current_idx] += dt self.call_times[current_idx] += dt
...@@ -596,7 +596,7 @@ class Stack(VM): ...@@ -596,7 +596,7 @@ class Stack(VM):
if current_apply.inputs[r].owner: if current_apply.inputs[r].owner:
apply_stack.append(current_apply.inputs[r].owner) apply_stack.append(current_apply.inputs[r].owner)
else: else:
if config.profile: if config.profile or config.print_global_stats:
for (idx, o) in enumerate(thunks[ for (idx, o) in enumerate(thunks[
self.node_idx[current_apply]].outputs): self.node_idx[current_apply]].outputs):
var = self.nodes[ var = self.nodes[
...@@ -757,7 +757,7 @@ class VM_Linker(link.LocalLinker): ...@@ -757,7 +757,7 @@ class VM_Linker(link.LocalLinker):
associated to self, else, a new VM_Linker associated to fgraph. associated to self, else, a new VM_Linker associated to fgraph.
""" """
if (config.profile and if ((config.profile or config.print_global_stats) and
((hasattr(theano, 'sandbox') and ((hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and hasattr(theano.sandbox, 'cuda') and
theano.sandbox.cuda.cuda_enabled) or theano.sandbox.cuda.cuda_enabled) or
...@@ -856,7 +856,7 @@ class VM_Linker(link.LocalLinker): ...@@ -856,7 +856,7 @@ class VM_Linker(link.LocalLinker):
pre_call_clear = [storage_map[v] for v in self.no_recycling] pre_call_clear = [storage_map[v] for v in self.no_recycling]
if (self.callback is not None or self.callback_input is not None or if (self.callback is not None or self.callback_input is not None or
(config.profile and config.profile_memory) or ((config.profile or config.print_global_stats) and config.profile_memory) or
(self.allow_partial_eval and not self.use_cloop)): (self.allow_partial_eval and not self.use_cloop)):
if self.use_cloop and (self.callback is not None or if self.use_cloop and (self.callback is not None or
...@@ -1086,7 +1086,7 @@ class VM_Linker(link.LocalLinker): ...@@ -1086,7 +1086,7 @@ class VM_Linker(link.LocalLinker):
lazy = config.vm.lazy lazy = config.vm.lazy
if lazy is None: if lazy is None:
lazy = not all([(not th.lazy) for th in thunks]) lazy = not all([(not th.lazy) for th in thunks])
if not (lazy or (config.profile and config.profile_memory) or if not (lazy or ((config.profile or config.print_global_stats) and config.profile_memory) or
self.use_cloop or self.callback or self.callback_input): self.use_cloop or self.callback or self.callback_input):
for pair in itervalues(reallocated_info): for pair in itervalues(reallocated_info):
storage_map[pair[1]] = storage_map[pair[0]] storage_map[pair[1]] = storage_map[pair[0]]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论