提交 e07d96fc authored 作者: Frederic Bastien's avatar Frederic Bastien

implemented a hidden hack in profilemode to print the number of MFlops. Need…

implemented a hidden hack in profilemode to print the number of MFlops. Need manual intervention and good knowledge of the theano internal to make it work correctly.
上级 bd979bd5
...@@ -9,8 +9,10 @@ class ProfileMode(Mode): ...@@ -9,8 +9,10 @@ class ProfileMode(Mode):
def __init__(self, linker=OpWiseCLinker(), optimizer=None): def __init__(self, linker=OpWiseCLinker(), optimizer=None):
local_time = [0.0] local_time = [0.0]
apply_time = {} apply_time = {}
apply_call = {}
op_time = {} op_time = {}
op_cimpl = {} op_cimpl = {}
op_call = {}
def blah(i, node, th): def blah(i, node, th):
if hasattr(th, 'cthunk'): if hasattr(th, 'cthunk'):
...@@ -24,13 +26,17 @@ class ProfileMode(Mode): ...@@ -24,13 +26,17 @@ class ProfileMode(Mode):
local_time[0] += dt local_time[0] += dt
apply_time[(i,node.op)] = apply_time.get((i,node.op), 0.0) + dt apply_time[(i,node.op)] = apply_time.get((i,node.op), 0.0) + dt
apply_call[(i,node.op)] = apply_call.get((i,node.op), 0) + 1
op_time[node.op] = op_time.get(node.op, 0.0) + dt op_time[node.op] = op_time.get(node.op, 0.0) + dt
op_cimpl[node.op] = hasattr(th, 'cthunk') op_cimpl[node.op] = hasattr(th, 'cthunk')
op_call[node.op] = op_call.get(node.op,0) + 1
self.local_time = local_time self.local_time = local_time
self.apply_time = apply_time self.apply_time = apply_time
self.apply_call = apply_call
self.op_time = op_time self.op_time = op_time
self.op_cimpl = op_cimpl self.op_cimpl = op_cimpl
self.op_call = op_call
self.compile_time = 0 #time passed in function() self.compile_time = 0 #time passed in function()
if isinstance(linker, str): if isinstance(linker, str):
...@@ -49,6 +55,8 @@ class ProfileMode(Mode): ...@@ -49,6 +55,8 @@ class ProfileMode(Mode):
The Op-wise summary print the execution time of all Apply nodes executing the same Op are grouped together and the total execution time per Op is shown (so if you use dot twice, you will see only one entry there corresponding to the sum of the time spent in each of them). If two Op have different hash value, they will be separate. The Op-wise summary print the execution time of all Apply nodes executing the same Op are grouped together and the total execution time per Op is shown (so if you use dot twice, you will see only one entry there corresponding to the sum of the time spent in each of them). If two Op have different hash value, they will be separate.
The type-Op-wise summary group the result by type of op. So event if two Op have different hash value, they will be merged. The type-Op-wise summary group the result by type of op. So event if two Op have different hash value, they will be merged.
Their is an hack with the Op-wise summary. Go see it if you want to know more.
param: n_apply_to_print the number of apply to print. Default 15. param: n_apply_to_print the number of apply to print. Default 15.
param: n_ops_to_print the number of ops to print. Default 20. param: n_ops_to_print the number of ops to print. Default 20.
...@@ -75,16 +83,32 @@ class ProfileMode(Mode): ...@@ -75,16 +83,32 @@ class ProfileMode(Mode):
sum(f for f, t, a in atimes[n_apply_to_print:])*100, sum(f for f, t, a in atimes[n_apply_to_print:])*100,
sum(t for f, t, a in atimes[n_apply_to_print:])) sum(t for f, t, a in atimes[n_apply_to_print:]))
flops=False
flops_msg=''
for a,t in op_time.items():
if hasattr(a,'flops'):
print "if true!"
flops=True
flops_msg=' <MFlops/s>'
print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!'
break
print '\nOp-wise summary: < of local_time spent on this kind of Op> <cumulative seconds> <self seconds>%s <Op name>'%(flops_msg)
print '\nOp-wise summary: <% of local_time spent on this kind of Op> <cumulative seconds> <self seconds> <Op name>'
otimes = [(t/local_time, t, a, self.op_cimpl[a]) for a, t in op_time.items()] otimes = [(t/local_time, t, a, self.op_cimpl[a]) for a, t in op_time.items()]
otimes.sort() otimes.sort()
otimes.reverse() otimes.reverse()
tot=0 tot=0
for f,t,a,ci in otimes[:n_ops_to_print]: for f,t,a,ci in otimes[:n_ops_to_print]:
tot+=t tot+=t
print ' %.2f%% %.3fs %.3fs %s %s' % (f*100, tot, t, '*' if ci else ' ', a) m=-1
print ' ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\ if hasattr(a,'flops'):
m=a.flops*self.op_call[a]/t/1e6
if flops:
print ' %4.1f%% %.3fs %.3fs %s %7.1f %s' % (f*100, tot, t, '*' if ci else ' ', m,a)
else:
print ' %4.1f%% %.3fs %.3fs %s %s' % (f*100, tot, t, '*' if ci else ' ', a)
print ' ... (remaining %i Ops account for %6.2f%%(%.2fs) of the runtime)'\
%(max(0, len(otimes)-n_ops_to_print), %(max(0, len(otimes)-n_ops_to_print),
sum(f for f, t, a, ci in otimes[n_ops_to_print:])*100, sum(f for f, t, a, ci in otimes[n_ops_to_print:])*100,
sum(t for f, t, a, ci in otimes[n_ops_to_print:])) sum(t for f, t, a, ci in otimes[n_ops_to_print:]))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论