提交 81211317 authored 作者: Saizheng Zhang's avatar Saizheng Zhang

5055:Port ProfileMode GPU to new back-end and profiling.py

上级 acd4a90f
...@@ -147,6 +147,12 @@ def print_global_stats(): ...@@ -147,6 +147,12 @@ def print_global_stats():
print('=' * 50, file=destination_file) print('=' * 50, file=destination_file)
_profiler_printers = []
def register_profiler_printer(fct):
profiler_printers.append(fct)
return fct
class ProfileStats(object): class ProfileStats(object):
""" """
......
...@@ -1538,6 +1538,58 @@ class GpuSplit(HideC, Split): ...@@ -1538,6 +1538,58 @@ class GpuSplit(HideC, Split):
return main_code % locals() return main_code % locals()
@theano.compile.profiling.register_profiler_printer
def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size,
other_time):
if any([x[1].op.__class__.__name__.lower().startswith("gpu")
for x in apply_time.keys()]):
local_time = sum(apply_time.values())
print()
print('Some info useful for gpu:')
cpu = 0
gpu = 0
trans = 0
for (_, node), t in iteritems(apply_time):
if isinstance(node.op.__class__.__name__,
(HostFromGpu, GpuFromHost)):
trans += t
elif node.op.__class__.__name__.lower().startswith("gpu"):
gpu += t
else:
cpu += t
print()
print(" Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op" % (
cpu, cpu / local_time * 100, gpu, gpu / local_time * 100,
trans, trans / local_time * 100))
print()
print(" Theano function input that are float64")
print(" <fct name> <input name> <input type> <str input>")
for fct in fct_call:
for i in fct.input_storage:
if hasattr(i.type, 'dtype') and i.type.dtype == 'float64':
print(' ', fct.name, i.name, i.type, i)
print()
print(" List of apply that don't have float64 as input but have float64 in outputs")
print(" (Useful to know if we forgot some cast when using floatX=float32 or gpu code)")
print(' <Apply> <Apply position> <fct name> <inputs type> <outputs type>')
for fct in fct_call:
for idx, node in enumerate(fct.maker.fgraph.toposort()):
if (any(hasattr(i, 'dtype') and i.dtype == 'float64'
for i in node.outputs) and
not any(hasattr(i, 'dtype') and i.dtype == 'float64'
for i in node.inputs)):
print(' ', str(node), idx, fct.name, end=' ')
print(str([getattr(i, 'dtype', None)
for i in node.inputs]), end=' ')
print(str([getattr(i, 'dtype', None)
for i in node.outputs]))
class GpuEye(GpuKernelBase, Op): class GpuEye(GpuKernelBase, Op):
""" """
Eye for GPU. Eye for GPU.
......
...@@ -4092,59 +4092,6 @@ def tensor4(name=None, dtype=None): ...@@ -4092,59 +4092,6 @@ def tensor4(name=None, dtype=None):
ftensor4 = CudaNdarrayType(dtype='float32', broadcastable=(False,) * 4) ftensor4 = CudaNdarrayType(dtype='float32', broadcastable=(False,) * 4)
# TODO: move that to the new back-end and new profiling.py print_tips
# @theano.compile.profilemode.register_profiler_printer
def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size,
other_time):
if any([x[1].op.__class__.__name__.lower().startswith("gpu")
for x in apply_time.keys()]):
local_time = sum(apply_time.values())
print()
print('Some info useful for gpu:')
cpu = 0
gpu = 0
trans = 0
for (_, node), t in iteritems(apply_time):
if isinstance(node.op.__class__.__name__,
(HostFromGpu, GpuFromHost)):
trans += t
elif node.op.__class__.__name__.lower().startswith("gpu"):
gpu += t
else:
cpu += t
print()
print(" Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op" % (
cpu, cpu / local_time * 100, gpu, gpu / local_time * 100,
trans, trans / local_time * 100))
print()
print(" Theano function input that are float64")
print(" <fct name> <input name> <input type> <str input>")
for fct in fct_call:
for i in fct.input_storage:
if hasattr(i.type, 'dtype') and i.type.dtype == 'float64':
print(' ', fct.name, i.name, i.type, i)
print()
print(" List of apply that don't have float64 as input but have float64 in outputs")
print(" (Useful to know if we forgot some cast when using floatX=float32 or gpu code)")
print(' <Apply> <Apply position> <fct name> <inputs type> <outputs type>')
for fct in fct_call:
for idx, node in enumerate(fct.maker.fgraph.toposort()):
if (any(hasattr(i, 'dtype') and i.dtype == 'float64'
for i in node.outputs) and
not any(hasattr(i, 'dtype') and i.dtype == 'float64'
for i in node.inputs)):
print(' ', str(node), idx, fct.name, end=' ')
print(str([getattr(i, 'dtype', None)
for i in node.inputs]), end=' ')
print(str([getattr(i, 'dtype', None)
for i in node.outputs]))
class GpuEye(GpuOp): class GpuEye(GpuOp):
def __init__(self, dtype=None): def __init__(self, dtype=None):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论