提交 4a3e11c1 authored 作者: Saizheng Zhang's avatar Saizheng Zhang

fix the argument passing problem to ProfileStats.print_extra() and corresponding…

fix the argument passing problem to ProfileStats.print_extra() and corresponding profiler in gpu and scan
上级 f78cd6c2
...@@ -102,7 +102,6 @@ def _atexit_print_fn(): ...@@ -102,7 +102,6 @@ def _atexit_print_fn():
assert len(merge) == len(cum.optimizer_profile[1]) assert len(merge) == len(cum.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge) cum.optimizer_profile = (cum.optimizer_profile[0], merge)
except Exception as e: except Exception as e:
print("Got an exception while merging profile")
print(e) print(e)
cum.optimizer_profile = None cum.optimizer_profile = None
else: else:
...@@ -178,7 +177,7 @@ class ProfileStats(object): ...@@ -178,7 +177,7 @@ class ProfileStats(object):
self.apply_time = {} self.apply_time = {}
self.apply_callcount = {} self.apply_callcount = {}
# self.apply_cimpl = None # self.apply_cimpl = None
# self.messge = None # self.message = None
# #
# Note on implementation: # Note on implementation:
# Class variables are used here so that each one can be # Class variables are used here so that each one can be
...@@ -1472,7 +1471,8 @@ class ProfileStats(object): ...@@ -1472,7 +1471,8 @@ class ProfileStats(object):
print(" Sorry, no tip for today.", file=file) print(" Sorry, no tip for today.", file=file)
def print_extra(self): def print_extra(self):
params = [None, None, None, None, self.apply_time, None, None, None, None] params = [self.message, self.compile_time, self.fct_call_time,
self.apply_time, self.apply_cimpl, self.output_size]
for f in _profiler_printers: for f in _profiler_printers:
f(*params) f(*params)
......
...@@ -1540,15 +1540,18 @@ class GpuSplit(HideC, Split): ...@@ -1540,15 +1540,18 @@ class GpuSplit(HideC, Split):
@theano.compile.profiling.register_profiler_printer @theano.compile.profiling.register_profiler_printer
def profile_printer(fct_name, compile_time, fct_call_time, fct_call, def profile_printer(message, compile_time, fct_call_time,
apply_time, apply_cimpl, message, outputs_size, apply_time, apply_cimpl, outputs_size):
other_time): if any([x.op.__class__.__name__.lower().startswith("gpu")
if any([x[1].op.__class__.__name__.lower().startswith("gpu")
for x in apply_time.keys()]): for x in apply_time.keys()]):
local_time = sum(apply_time.values()) local_time = sum(apply_time.values())
print() print()
print('Some info useful for gpu:') print('Some info useful for gpu:')
fct_call = set()
for node in apply_time.keys():
fct_call.add(node.fgraph)
cpu = 0 cpu = 0
gpu = 0 gpu = 0
trans = 0 trans = 0
......
...@@ -2868,17 +2868,21 @@ gof.ops_with_inner_function[Scan] = 'fn' ...@@ -2868,17 +2868,21 @@ gof.ops_with_inner_function[Scan] = 'fn'
@theano.compile.profiling.register_profiler_printer @theano.compile.profiling.register_profiler_printer
def profile_printer(fct_name, compile_time, fct_call_time, fct_call, def profile_printer(message, compile_time, fct_call_time,
apply_time, apply_cimpl, message, outputs_size, apply_time, apply_cimpl, outputs_size):
other_time):
# Scan overhead profile # Scan overhead profile
if any([isinstance(node.op, Scan) and v > 0 for (_, node), v in if any([isinstance(node.op, Scan) and v > 0 for node, v in
apply_time.items()]): apply_time.items()]):
print() print()
print('Scan overhead:') print('Scan overhead:')
print('<Scan op time(s)> <sub scan fct time(s)> <sub scan op ' print('<Scan op time(s)> <sub scan fct time(s)> <sub scan op '
'time(s)> <sub scan fct time(% scan op time)> <sub scan ' 'time(s)> <sub scan fct time(% scan op time)> <sub scan '
'op time(% scan op time)> <node>') 'op time(% scan op time)> <node>')
fct_call = set()
for node in apply_time.keys():
fct_call.add(node.fgraph)
total_super_scan_time = 0 total_super_scan_time = 0
total_scan_fct_time = 0 total_scan_fct_time = 0
total_scan_op_time = 0 total_scan_op_time = 0
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论