提交 9dbacd88 authored 作者: Frederic Bastien's avatar Frederic Bastien

don't count apply in compiled but not used theano function in the profile mode.

上级 1c11f7f9
...@@ -258,8 +258,12 @@ class ProfileMode(Mode): ...@@ -258,8 +258,12 @@ class ProfileMode(Mode):
op_call.setdefault(op,0) op_call.setdefault(op,0)
op_apply.setdefault(op,0) op_apply.setdefault(op,0)
op_time[op]+=t op_time[op]+=t
op_call[op]+=[v for k,v in fct_call.items() if k.maker.env is a.env][0] nb_call = [v for k,v in fct_call.items() if k.maker.env is a.env][0]
op_apply[op]+=1 if t==0:
assert nb_call == 0
else:
op_call[op] += nb_call
op_apply[op] += 1
op_flops = {} op_flops = {}
for a,t in op_time.items(): for a,t in op_time.items():
...@@ -270,7 +274,7 @@ class ProfileMode(Mode): ...@@ -270,7 +274,7 @@ class ProfileMode(Mode):
if op_flops: if op_flops:
flops_msg=' <MFlops/s>' flops_msg=' <MFlops/s>'
print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!' print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!'
print '\nOp-wise summary: <%% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> %s <nb_call> <nb apply> <Op name>'%(flops_msg) print '\nOp-wise summary: <%% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> %s <nb_call> <nb called apply> <Op name>'%(flops_msg)
otimes = [(t*100/local_time, t, a, op_cimpl.get(a, 0), op_call.get(a, 0), op_apply.get(a,0)) otimes = [(t*100/local_time, t, a, op_cimpl.get(a, 0), op_call.get(a, 0), op_apply.get(a,0))
for a, t in op_time.items()] for a, t in op_time.items()]
...@@ -311,7 +315,7 @@ class ProfileMode(Mode): ...@@ -311,7 +315,7 @@ class ProfileMode(Mode):
sop_c.setdefault(typ,True) sop_c.setdefault(typ,True)
sop_c[typ]=sop_c[typ] and op_cimpl.get(a, False) sop_c[typ]=sop_c[typ] and op_cimpl.get(a, False)
sop_call[typ]=sop_call.get(typ,0)+op_call[a] sop_call[typ]=sop_call.get(typ,0)+op_call[a]
print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> <nb_call> <nb_op> <nb_op> <Op name>' print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> <nb_call> <nb_op> <Op name>'
sotimes = [(t*100/local_time, t, a, sop_c[a], sop_call[a], sop_op[a]) for a, t in sop_time.items()] sotimes = [(t*100/local_time, t, a, sop_c[a], sop_call[a], sop_op[a]) for a, t in sop_time.items()]
sotimes.sort() sotimes.sort()
sotimes.reverse() sotimes.reverse()
...@@ -501,7 +505,6 @@ class ProfileMode(Mode): ...@@ -501,7 +505,6 @@ class ProfileMode(Mode):
if s_op.__class__ in scalar_op_amdlibm_speed_up: if s_op.__class__ in scalar_op_amdlibm_speed_up:
return True return True
elif s_op.__class__ not in scalar_op_amdlibm_no_speed_up: elif s_op.__class__ not in scalar_op_amdlibm_no_speed_up:
import pdb;pdb.set_trace()
print "We don't know if amdlibm will accelerate this scalar op.", s_op print "We don't know if amdlibm will accelerate this scalar op.", s_op
return False return False
def exp_float32_op(op): def exp_float32_op(op):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论