提交 c570b5c9 authored 作者: Frederic Bastien's avatar Frederic Bastien

added in the profiler a list of op that don't take float64 as input but generate…

added in the profiler a list of op that don't take float64 as input but generate float64 outputs. Usefull for the gpu.
上级 a2fd617c
......@@ -317,6 +317,16 @@ class ProfileMode(Mode):
print 'Other time since import %.3fs %.1f%%'%(other_time,other_time/total_time*100)
print '%i Theano fct call, %.3fs per call'%(total_fct_call, time_per_call)
#imported here to break circular dependency...
from theano.tensor.basic import as_tensor_variable
print
print "List of apply that don't have float64 as input but have float64 in outputs. Usefull to know if we forgot some cast when using floatX=float32 or gpu code."
print '<Apply> <inputs type> <outputs type>'
for fct in fct_call.keys():
for node in fct.maker.env.toposort():
if any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.outputs) and not any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.inputs):
print str(node),str([getattr(i,'dtype',None) for i in node.inputs]),str([getattr(i,'dtype',None) for i in node.outputs])
if any([x[2].__name__.startswith("Gpu") for x in sotimes]):
cpu=[]
gpu=[]
......@@ -331,7 +341,7 @@ class ProfileMode(Mode):
sum_cpu=sum(so[1] for so in cpu)
sum_gpu=sum(so[1] for so in gpu)
sum_trans=sum(so[1] for so in trans)
print sum_cpu+sum_gpu+sum_trans,local_time
print
print "Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op"%(
sum_cpu, sum_cpu/local_time*100, sum_gpu, sum_gpu/local_time*100, sum_trans, sum_trans/local_time*100)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论