提交 178119c9 authored 作者: Frederic Bastien's avatar Frederic Bastien

better formating of output of profilemode.

上级 7e6b33ad
...@@ -269,7 +269,8 @@ class ProfileMode(Mode): ...@@ -269,7 +269,8 @@ class ProfileMode(Mode):
print 'Rest of the time since import %.3fs %.1f%%'%(unknown_time, unknown_time/total_time*100) print 'Rest of the time since import %.3fs %.1f%%'%(unknown_time, unknown_time/total_time*100)
print print
print 'Theano fct summary: <% total fct time> <total time> <time per call> <nb call> <fct name>' print 'Theano fct summary:'
print '<% total fct time> <total time> <time per call> <nb call> <fct name>'
for key in fct_call.keys(): for key in fct_call.keys():
if fct_call[key]>0: if fct_call[key]>0:
print ' %4.1f%% %.3fs %.2es %d %s'%(fct_call_time[key]/total_fct_time*100 ,fct_call_time[key], print ' %4.1f%% %.3fs %.2es %d %s'%(fct_call_time[key]/total_fct_time*100 ,fct_call_time[key],
...@@ -312,7 +313,9 @@ class ProfileMode(Mode): ...@@ -312,7 +313,9 @@ class ProfileMode(Mode):
# Print the summary per op class. # Print the summary per op class.
print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> <nb_call> <nb_op> <Op name>' print
print 'Single Op-wise summary:'
print '<% of local_time spent on this kind of Op> <cumulative %> <self seconds> <cumulative seconds> <time per call> <nb_call> <nb_op> <Op name>'
sotimes = [(t*100/local_time, t, a, sop_c[a], sop_call[a], sop_op[a]) for a, t in sop_time.items()] sotimes = [(t*100/local_time, t, a, sop_c[a], sop_call[a], sop_op[a]) for a, t in sop_time.items()]
sotimes.sort() sotimes.sort()
sotimes.reverse() sotimes.reverse()
...@@ -345,7 +348,9 @@ class ProfileMode(Mode): ...@@ -345,7 +348,9 @@ class ProfileMode(Mode):
if op_flops: if op_flops:
flops_msg=' <MFlops/s>' flops_msg=' <MFlops/s>'
print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!' print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!'
print '\nOp-wise summary: <%% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> %s <nb_call> <nb called apply> <Op name>'%(flops_msg) print
print 'Op-wise summary:'
print '<%% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> %s <nb_call> <nb called apply> <Op name>'%(flops_msg)
otimes = [(t*100/local_time, t, a, op_cimpl.get(a, 0), op_call.get(a, 0), op_apply.get(a,0)) otimes = [(t*100/local_time, t, a, op_cimpl.get(a, 0), op_call.get(a, 0), op_apply.get(a,0))
for a, t in op_time.items()] for a, t in op_time.items()]
...@@ -374,7 +379,9 @@ class ProfileMode(Mode): ...@@ -374,7 +379,9 @@ class ProfileMode(Mode):
if print_apply: if print_apply:
print '\nApply-wise summary: <% of local_time spent at this position> <cumulative %%> <apply time> <cumulative seconds> <time per call> <nb_call> <Apply position> <Apply Op name>' print
print 'Apply-wise summary:'
print '<% of local_time spent at this position> <cumulative %%> <apply time> <cumulative seconds> <time per call> <nb_call> <Apply position> <Apply Op name>'
atimes = [(t*100/local_time, t, a, [v for k,v in fct_call.items() if k.maker.env is a[1].env][0]) for a, t in apply_time.items()] atimes = [(t*100/local_time, t, a, [v for k,v in fct_call.items() if k.maker.env is a[1].env][0]) for a, t in apply_time.items()]
atimes.sort() atimes.sort()
atimes.reverse() atimes.reverse()
...@@ -394,7 +401,8 @@ class ProfileMode(Mode): ...@@ -394,7 +401,8 @@ class ProfileMode(Mode):
import theano # Why we need to re-import theano here? Otherwise is crash import theano # Why we need to re-import theano here? Otherwise is crash
if any([isinstance(node.op, (theano.Scan, theano.ScanGrad)) for (_,node) in apply_time.keys()]): if any([isinstance(node.op, (theano.Scan, theano.ScanGrad)) for (_,node) in apply_time.keys()]):
print print
print "Scan overhead: <Scan op time(s)> <sub scan fct time(s)> <sub scan op time(s)> <sub scan fct time(% scan op time)> <sub scan op time(% scan op time)> <node>" print 'Scan overhead:'
print '<Scan op time(s)> <sub scan fct time(s)> <sub scan op time(s)> <sub scan fct time(% scan op time)> <sub scan op time(% scan op time)> <node>'
for (_,node),v in apply_time.items(): for (_,node),v in apply_time.items():
if isinstance(node.op, (theano.Scan, theano.ScanGrad)): if isinstance(node.op, (theano.Scan, theano.ScanGrad)):
scan_fct_time = sum(node.op.mode_instance.fct_call_time.values()) scan_fct_time = sum(node.op.mode_instance.fct_call_time.values())
...@@ -419,24 +427,25 @@ class ProfileMode(Mode): ...@@ -419,24 +427,25 @@ class ProfileMode(Mode):
sum_gpu=sum(so[1] for so in gpu) sum_gpu=sum(so[1] for so in gpu)
sum_trans=sum(so[1] for so in trans) sum_trans=sum(so[1] for so in trans)
print print
print "Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op"%( print " Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op"%(
sum_cpu, sum_cpu/local_time*100, sum_gpu, sum_gpu/local_time*100, sum_trans, sum_trans/local_time*100) sum_cpu, sum_cpu/local_time*100, sum_gpu, sum_gpu/local_time*100, sum_trans, sum_trans/local_time*100)
print print
print "Theano function input that are float64" print " Theano function input that are float64"
print "<fct name> <input name> <input type> <str input>" print " <fct name> <input name> <input type> <str input>"
for fct in fct_call.keys(): for fct in fct_call.keys():
for i in fct.input_storage: for i in fct.input_storage:
if hasattr(i.type, 'dtype') and i.type.dtype=='float64': if hasattr(i.type, 'dtype') and i.type.dtype=='float64':
print fct.name, i.name, i.type, i print ' ', fct.name, i.name, i.type, i
print print
print "List of apply that don't have float64 as input but have float64 in outputs. Useful to know if we forgot some cast when using floatX=float32 or gpu code." print " List of apply that don't have float64 as input but have float64 in outputs"
print '<Apply> <Apply position> <fct name> <inputs type> <outputs type>' print " (Useful to know if we forgot some cast when using floatX=float32 or gpu code)"
print ' <Apply> <Apply position> <fct name> <inputs type> <outputs type>'
for fct in fct_call.keys(): for fct in fct_call.keys():
for idx, node in enumerate(fct.maker.env.toposort()): for idx, node in enumerate(fct.maker.env.toposort()):
if any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.outputs) and not any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.inputs): if any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.outputs) and not any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.inputs):
print str(node), idx, fct.name, str([getattr(i,'dtype',None) for i in node.inputs]),str([getattr(i,'dtype',None) for i in node.outputs]) print ' ', str(node), idx, fct.name, str([getattr(i,'dtype',None) for i in node.inputs]),str([getattr(i,'dtype',None) for i in node.outputs])
if outputs_size: if outputs_size:
fct_memory={}#env->dict(node->(outputs size)) fct_memory={}#env->dict(node->(outputs size))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论