提交 d74681bf authored 作者: Frederic Bastien's avatar Frederic Bastien

make the profiler in gpu mode output more info that can help find why op are not on the gpu.

上级 e21a5958
...@@ -311,11 +311,11 @@ class ProfileMode(Mode): ...@@ -311,11 +311,11 @@ class ProfileMode(Mode):
from theano.tensor.basic import as_tensor_variable from theano.tensor.basic import as_tensor_variable
print print
print "List of apply that don't have float64 as input but have float64 in outputs. Usefull to know if we forgot some cast when using floatX=float32 or gpu code." print "List of apply that don't have float64 as input but have float64 in outputs. Usefull to know if we forgot some cast when using floatX=float32 or gpu code."
print '<Apply> <inputs type> <outputs type>' print '<Apply> <Apply position> <fct name> <inputs type> <outputs type>'
for fct in fct_call.keys(): for fct in fct_call.keys():
for node in fct.maker.env.toposort(): for idx, node in enumerate(fct.maker.env.toposort()):
if any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.outputs) and not any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.inputs): if any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.outputs) and not any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.inputs):
print str(node),str([getattr(i,'dtype',None) for i in node.inputs]),str([getattr(i,'dtype',None) for i in node.outputs]) print str(node), idx, fct.name, str([getattr(i,'dtype',None) for i in node.inputs]),str([getattr(i,'dtype',None) for i in node.outputs])
if any([x[2].__name__.startswith("Gpu") for x in sotimes]): if any([x[2].__name__.startswith("Gpu") for x in sotimes]):
cpu=[] cpu=[]
...@@ -336,6 +336,13 @@ class ProfileMode(Mode): ...@@ -336,6 +336,13 @@ class ProfileMode(Mode):
print "Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op"%( print "Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op"%(
sum_cpu, sum_cpu/local_time*100, sum_gpu, sum_gpu/local_time*100, sum_trans, sum_trans/local_time*100) sum_cpu, sum_cpu/local_time*100, sum_gpu, sum_gpu/local_time*100, sum_trans, sum_trans/local_time*100)
print "Theano function input that are float64"
print "<fct name> <input name> <input type> <str input>"
for fct in fct_call.keys():
for i in fct.input_storage:
if i.type.dtype=='float64':
print fct.name, i.name, i.type, i
register_mode('PROFILE_MODE',ProfileMode()) register_mode('PROFILE_MODE',ProfileMode())
#needed to print the profile at the end automatically #needed to print the profile at the end automatically
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论