提交 1fb1b270 authored 作者: Frederic Bastien's avatar Frederic Bastien

add ProfileMode.print_diff_summary. Don't work with gpu, apply-wise and flops.

上级 5e9ca07a
import time, atexit import time, atexit, copy
from theano.gof.link import WrapLinkerMany from theano.gof.link import WrapLinkerMany
from theano.gof.cutils import run_cthunk from theano.gof.cutils import run_cthunk
...@@ -64,16 +64,74 @@ class ProfileMode(Mode): ...@@ -64,16 +64,74 @@ class ProfileMode(Mode):
param: n_ops_to_print the number of ops to print. Default 20. param: n_ops_to_print the number of ops to print. Default 20.
""" """
local_time = self.local_time[0] local_time = self.local_time[0]
compile_time = self.compile_time
apply_time = self.apply_time apply_time = self.apply_time
apply_call = self.apply_call apply_call = self.apply_call
op_time = self.op_time op_time = self.op_time
op_call = self.op_call op_call = self.op_call
op_cimpl = self.op_cimpl
self.print_summary_("print_summary",local_time, compile_time, apply_time, apply_call, op_time, op_call, op_cimpl, n_apply_to_print, n_ops_to_print)
def print_diff_summary(self, other, n_apply_to_print=15, n_ops_to_print=20):
""" As print_summary, but print the absolute difference on two different profile mode.
TODO: Also we don't print the Apply-wise summary as it don't work for now.
TODO: make flops the difference of flops
TODO: make comparaison with gpu code.
param: other the other instance of ProfileMode that we want to be compared to.
param: n_apply_to_print the number of apply to print. Default 15.
param: n_ops_to_print the number of ops to print. Default 20.
"""
def diff_dict(a,b_):
r = {}
b = copy.copy(b_)
for a,t in a.items():
r.setdefault(a,0)
t2 = b.pop(a,0)
#print t,t2,abs(t-t2),a
r[a]+=abs(t-t2)
#they are missing in a
print "missing items",len(b)
for a,t in b.items():
r.setdefault(a,0)
r[a]+=t
return r
local_time = abs(self.local_time[0]-other.local_time[0])
compile_time = abs(self.compile_time-other.compile_time)
apply_time = diff_dict(self.apply_time, other.apply_time)
apply_call = diff_dict(self.apply_call, other.apply_call)
op_time = diff_dict(self.op_time, other.op_time)
op_call = diff_dict(self.op_call, other.op_call)
op_cimpl = self.op_cimpl and other.op_cimpl
self.print_summary_("print_diff_summary",local_time, compile_time, apply_time, apply_call, op_time, op_call, op_cimpl, n_apply_to_print, n_ops_to_print, print_apply=False)
@staticmethod
def print_summary_(fct_name, local_time, compile_time, apply_time, apply_call, op_time, op_call, op_cimpl,
n_apply_to_print=15, n_ops_to_print=20, print_apply=True):
"""
do the actual printing of print_summary and print_diff_summary.
param: n_apply_to_print the number of apply to print. Default 15.
param: n_ops_to_print the number of ops to print. Default 20.
"""
print '' print ''
print 'ProfileMode.print_summary()' print 'ProfileMode.%s()'%(fct_name)
print '---------------------------' print '---------------------------'
print '' print ''
print 'local_time %fs (Time spent running thunks)'% local_time print 'local_time %fs (Time spent running thunks)'% local_time
if print_apply:
print 'Apply-wise summary: <% of local_time spent at this position> <total of local_time spent at this position> <nb_call> <Apply position> <Apply Op name>' print 'Apply-wise summary: <% of local_time spent at this position> <total of local_time spent at this position> <nb_call> <Apply position> <Apply Op name>'
atimes = [(t/local_time, t, (a[0], str(a[1])), apply_call[a]) for a, t in apply_time.items()] atimes = [(t/local_time, t, (a[0], str(a[1])), apply_call[a]) for a, t in apply_time.items()]
atimes.sort() atimes.sort()
...@@ -98,7 +156,7 @@ class ProfileMode(Mode): ...@@ -98,7 +156,7 @@ class ProfileMode(Mode):
print '\nOp-wise summary: < of local_time spent on this kind of Op> <cumulative seconds> <self seconds>%s <nb_call> <Op name>'%(flops_msg) print '\nOp-wise summary: < of local_time spent on this kind of Op> <cumulative seconds> <self seconds>%s <nb_call> <Op name>'%(flops_msg)
otimes = [(t/local_time, t, a, self.op_cimpl[a], op_call[a]) for a, t in op_time.items()] otimes = [(t/local_time, t, a, op_cimpl[a], op_call[a]) for a, t in op_time.items()]
otimes.sort() otimes.sort()
otimes.reverse() otimes.reverse()
tot=0 tot=0
...@@ -110,7 +168,7 @@ class ProfileMode(Mode): ...@@ -110,7 +168,7 @@ class ProfileMode(Mode):
msg = ' ' msg = ' '
m=-1 m=-1
if hasattr(a,'flops'): if hasattr(a,'flops'):
m=a.flops*self.op_call[a]/t/1e6 m=a.flops*op_call[a]/t/1e6
if flops: if flops:
print ' %4.1f%% %.3fs %.3fs %s %7.1f %d %s' % (f*100, tot, t, msg, m, nb_call, a) print ' %4.1f%% %.3fs %.3fs %s %7.1f %d %s' % (f*100, tot, t, msg, m, nb_call, a)
else: else:
...@@ -129,7 +187,7 @@ class ProfileMode(Mode): ...@@ -129,7 +187,7 @@ class ProfileMode(Mode):
sop_time.setdefault(type(a),0) sop_time.setdefault(type(a),0)
sop_time[type(a)]+=t sop_time[type(a)]+=t
sop_c.setdefault(type(a),True) sop_c.setdefault(type(a),True)
sop_c[type(a)]=sop_c[type(a)] and self.op_cimpl[a] sop_c[type(a)]=sop_c[type(a)] and op_cimpl[a]
sop_call[type(a)]=sop_call.get(type(a),0)+op_call[a] sop_call[type(a)]=sop_call.get(type(a),0)+op_call[a]
print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <nb_call> <Op name>' print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <nb_call> <Op name>'
sotimes = [(t/local_time, t, a, sop_c[a], sop_call[a]) for a, t in sop_time.items()] sotimes = [(t/local_time, t, a, sop_c[a], sop_call[a]) for a, t in sop_time.items()]
...@@ -148,7 +206,7 @@ class ProfileMode(Mode): ...@@ -148,7 +206,7 @@ class ProfileMode(Mode):
sum(f for f, t, a, nb_call in sotimes[n_ops_to_print:])*100, sum(f for f, t, a, nb_call in sotimes[n_ops_to_print:])*100,
sum(t for f, t, a, nb_call in sotimes[n_ops_to_print:])) sum(t for f, t, a, nb_call in sotimes[n_ops_to_print:]))
print '(*) Op is running a c implementation' print '(*) Op is running a c implementation'
print 'compile time: %.3fs'%self.compile_time print 'compile time: %.3fs'%compile_time
register_mode('PROFILE_MODE',ProfileMode()) register_mode('PROFILE_MODE',ProfileMode())
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论