提交 27cb3e25 authored 作者: Frederic Bastien's avatar Frederic Bastien

Fix the new gpu profile printing

上级 171346e8
...@@ -1549,46 +1549,45 @@ def profile_printer(message, compile_time, fct_call_time, ...@@ -1549,46 +1549,45 @@ def profile_printer(message, compile_time, fct_call_time,
print() print()
print('Some info useful for gpu:') print('Some info useful for gpu:')
fct_call = set() fgraphs = set()
for node in apply_time.keys(): for node in apply_time.keys():
fct_call.add(node.fgraph) fgraphs.add(node.fgraph)
cpu = 0 cpu = 0
gpu = 0 gpu = 0
trans = 0 trans = 0
for (_, node), t in iteritems(apply_time): for node, t in iteritems(apply_time):
if isinstance(node.op.__class__.__name__, if isinstance(node.op, (HostFromGpu, GpuFromHost)):
(HostFromGpu, GpuFromHost)):
trans += t trans += t
elif node.op.__class__.__name__.lower().startswith("gpu"): elif node.op.__class__.__name__.lower().startswith("gpu"):
gpu += t gpu += t
else: else:
cpu += t cpu += t
print() print()
print(" Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op" % ( print(" Spent %.3fs(%.2f%%) in cpu Op, %.3fs(%.2f%%) in gpu Op and %.3fs(%.2f%%) transfert Op" % (
cpu, cpu / local_time * 100, gpu, gpu / local_time * 100, cpu, cpu / local_time * 100, gpu, gpu / local_time * 100,
trans, trans / local_time * 100)) trans, trans / local_time * 100))
print() print()
print(" Theano function input that are float64") print(" Theano function input that are float64")
print(" <fct name> <input name> <input type> <str input>") print(" <fct name> <input name> <input type> <str input>")
for fct in fct_call: for fg in fgraphs:
for i in fct.input_storage: for i in fg.inputs:
if hasattr(i.type, 'dtype') and i.type.dtype == 'float64': if hasattr(i.type, 'dtype') and i.type.dtype == 'float64':
print(' ', fct.name, i.name, i.type, i) print(' ', fg.name, i.name, i.type, i)
print() print()
print(" List of apply that don't have float64 as input but have float64 in outputs") print(" List of apply that don't have float64 as input but have float64 in outputs")
print(" (Useful to know if we forgot some cast when using floatX=float32 or gpu code)") print(" (Useful to know if we forgot some cast when using floatX=float32 or gpu code)")
print(' <Apply> <Apply position> <fct name> <inputs type> <outputs type>') print(' <Apply> <Apply position> <fct name> <inputs type> <outputs type>')
for fct in fct_call: for fg in fgraphs:
for idx, node in enumerate(fct.maker.fgraph.toposort()): for idx, node in enumerate(fg.toposort()):
if (any(hasattr(i, 'dtype') and i.dtype == 'float64' if (any(hasattr(i, 'dtype') and i.dtype == 'float64'
for i in node.outputs) and for i in node.outputs) and
not any(hasattr(i, 'dtype') and i.dtype == 'float64' not any(hasattr(i, 'dtype') and i.dtype == 'float64'
for i in node.inputs)): for i in node.inputs)):
print(' ', str(node), idx, fct.name, end=' ') print(' ', str(node), idx, fg.name, end=' ')
print(str([getattr(i, 'dtype', None) print(str([getattr(i, 'dtype', None)
for i in node.inputs]), end=' ') for i in node.inputs]), end=' ')
print(str([getattr(i, 'dtype', None) print(str([getattr(i, 'dtype', None)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论