提交 b594140c authored 作者: James Bergstra's avatar James Bergstra

moved profilemode out of sandbox

上级 ae90c5a2
...@@ -26,7 +26,7 @@ To learn more, check out: ...@@ -26,7 +26,7 @@ To learn more, check out:
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
from gof import \ from gof import \
CLinker, OpWiseCLinker, DualLinker, Linker, LocalLinker, PerformLinker, Profiler, \ CLinker, OpWiseCLinker, DualLinker, Linker, LocalLinker, PerformLinker, \
Container, \ Container, \
InconsistencyError, Env, \ InconsistencyError, Env, \
Apply, Result, Constant, Value, \ Apply, Result, Constant, Value, \
...@@ -44,7 +44,9 @@ from compile import \ ...@@ -44,7 +44,9 @@ from compile import \
predefined_modes, predefined_linkers, predefined_optimizers, \ predefined_modes, predefined_linkers, predefined_optimizers, \
FunctionMaker, function, OpFromGraph, \ FunctionMaker, function, OpFromGraph, \
Component, External, Member, KitComponent, Method, \ Component, External, Member, KitComponent, Method, \
Composite, ComponentList, ComponentDict, Module Composite, ComponentList, ComponentDict, Module, \
ProfileMode
FancyModule = Module FancyModule = Module
......
...@@ -14,3 +14,5 @@ from builders import * ...@@ -14,3 +14,5 @@ from builders import *
import module import module
from module import * from module import *
from profilemode import ProfileMode
import time
from ..gof.link import WrapLinkerMany
from ..gof.cutils import run_cthunk
from ..compile.mode import Mode
class ProfileMode(Mode):
def __init__(self, linker, optimizer=None):
local_time = [0.0]
apply_time = {}
op_time = {}
op_cimpl = {}
def blah(i, node, *thunks):
if 0:
t0 = time.time()
for th in thunks:
th()
dt = time.time() - t0
elif 0: #more precise timing
for th in thunks:
t0 = time.time()
th()
dt = time.time() - t0
elif 1:
for th in thunks:
if hasattr(th, 'cthunk'):
t0 = time.time()
run_cthunk(th.cthunk)
dt = time.time() - t0
else:
t0 = time.time()
th()
dt = time.time() - t0
elif 1:
pass
else:
raise Exception('one of the cases has to run the thunks!')
local_time[0] += dt
apply_time[(i,node.op)] = apply_time.get((i,node.op), 0.0) + dt
op_time[node.op] = op_time.get(node.op, 0.0) + dt
op_cimpl[node.op] = hasattr(thunks[0], 'cthunk')
self.local_time = local_time
self.apply_time = apply_time
self.op_time = op_time
self.op_cimpl = op_cimpl
wrap_linker = WrapLinkerMany([linker], [blah])
if optimizer:
super(ProfileMode, self).__init__(wrap_linker, optimizer)
else:
super(ProfileMode, self).__init__(wrap_linker)
def print_summary(self):
local_time = self.local_time[0]
apply_time = self.apply_time
op_time = self.op_time
print ''
print 'ProfileMode.print_summary()'
print '---------------------------'
print ''
print 'local_time', local_time, '(Time spent running thunks)'
print 'Apply-wise summary: <fraction of local_time spent at this position> (<Apply position>, <Apply Op name>)'
atimes = [(t/local_time, (a[0], str(a[1]))) for a, t in apply_time.items()]
atimes.sort()
atimes.reverse()
for t,a in atimes[:15]:
print '\t%.3f\t%i\t%s' % (t, a[0], a[1])
print ' ... (remaining %i Apply instances account for %.2f of the runtime)'\
%(max(0, len(atimes)-15), sum(t for t, a in atimes[15:]))
n_ops_to_print = 20
print 'Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>'
otimes = [(t/local_time, a, self.op_cimpl[a]) for a, t in op_time.items()]
otimes.sort()
otimes.reverse()
for t,a,ci in otimes[:n_ops_to_print]:
print '\t%.3f\t%s %s' % (t, '*' if ci else ' ', a)
print ' ... (remaining %i Ops account for %.2f of the runtime)'\
%(max(0, len(otimes)-n_ops_to_print), sum(t for t, a, ci in
otimes[n_ops_to_print:]))
print '(*) Op is running a c implementation'
...@@ -15,7 +15,7 @@ from graph import \ ...@@ -15,7 +15,7 @@ from graph import \
Apply, Result, Constant, Value, view_roots Apply, Result, Constant, Value, view_roots
from link import \ from link import \
Container, Linker, LocalLinker, PerformLinker, WrapLinker, Profiler Container, Linker, LocalLinker, PerformLinker, WrapLinker, WrapLinkerMany
from op import \ from op import \
Op Op
......
...@@ -220,14 +220,6 @@ def map_storage(env, order, input_storage, output_storage): ...@@ -220,14 +220,6 @@ def map_storage(env, order, input_storage, output_storage):
return input_storage, output_storage, storage_map return input_storage, output_storage, storage_map
def clear_storage_thunk(stg):
"""This is useful for inserting thunks that zero-out storage, which allows memory to be freed by gc."""
def thunk():
stg[0] = None
thunk.outputs = []
thunk.inputs = [stg]
return thunk
def streamline(env, thunks, order, post_thunk_old_storage = None, no_recycling = [], profiler = None, nice_errors = True): def streamline(env, thunks, order, post_thunk_old_storage = None, no_recycling = [], profiler = None, nice_errors = True):
"""WRITEME """WRITEME
...@@ -379,8 +371,8 @@ class PerformLinker(LocalLinker): ...@@ -379,8 +371,8 @@ class PerformLinker(LocalLinker):
input_storage, output_storage, storage_map = map_storage(env, order, input_storage, output_storage) input_storage, output_storage, storage_map = map_storage(env, order, input_storage, output_storage)
computed, last_user = gc_helper(order)
if self.allow_gc: if self.allow_gc:
computed, last_user = gc_helper(order)
post_thunk_old_storage = [] post_thunk_old_storage = []
else: else:
post_thunk_old_storage = None post_thunk_old_storage = None
...@@ -403,18 +395,6 @@ class PerformLinker(LocalLinker): ...@@ -403,18 +395,6 @@ class PerformLinker(LocalLinker):
for input in node.inputs for input in node.inputs
if (input in computed) and (input not in env.outputs) and node == last_user[input]]) if (input in computed) and (input not in env.outputs) and node == last_user[input]])
if 0: # -JB 20081202
if self.allow_gc:
for idx, input in enumerate(node.inputs):
if input not in computed:
continue
if input in env.outputs:
continue
if (node, idx) == last_user[input]:
#print '... zeroing', id(storage_map[input])
thunks.append(clear_storage_thunk(storage_map[input]))
new_order.append(node)
if no_recycling is True: if no_recycling is True:
# True seems like some special code for *everything*?? -JB # True seems like some special code for *everything*?? -JB
# FunctionMaker always passes a list I think -JB # FunctionMaker always passes a list I think -JB
...@@ -427,11 +407,19 @@ class PerformLinker(LocalLinker): ...@@ -427,11 +407,19 @@ class PerformLinker(LocalLinker):
f = streamline(env, thunks, order, post_thunk_old_storage, no_recycling = no_recycling, profiler = profiler) f = streamline(env, thunks, order, post_thunk_old_storage, no_recycling = no_recycling, profiler = profiler)
f.allow_gc = self.allow_gc #HACK: this is a way of passing an arg to Function.__call__ f.allow_gc = self.allow_gc #HACK: this is a way of passing an arg to Function.__call__
add_clear_storage(f, computed, storage_map)
return f, [Container(input, storage) for input, storage in zip(env.inputs, input_storage)], \ return f, [Container(input, storage) for input, storage in zip(env.inputs, input_storage)], \
[Container(output, storage, True) for output, storage in zip(env.outputs, output_storage)], \ [Container(output, storage, True) for output, storage in zip(env.outputs, output_storage)], \
thunks, order thunks, order
def add_clear_storage(f, computed, storage_map):
def clear_storage():
for c in computed:
storage_map[c][0] = None
f.clear_storage = clear_storage
class WrapLinker(Linker): class WrapLinker(Linker):
""" WRITEME """ WRITEME
This class makes it easier to run several L{LocalLinker}s in parallel, and This class makes it easier to run several L{LocalLinker}s in parallel, and
...@@ -541,113 +529,13 @@ class WrapLinker(Linker): ...@@ -541,113 +529,13 @@ class WrapLinker(Linker):
return f, inputs0, outputs0 return f, inputs0, outputs0
def WrapLinkerMany(linkers, wrappers):
""" Variant on WrapLinker that runs a series of wrapper functions instead of
import time just one.
class Stats:
"""WRITEME"""
def __init__(self):
self.ncalls = 0
self.time = 0
self.nfailures = 0
self.time_failures = 0
def inc_ncalls(self, v): self.ncalls += v
def inc_time(self, v): self.time += v
def inc_nfailures(self, v): self.nfailures += v
def inc_time_failures(self, v): self.time_failures += v
class Profiler:
"""WRITEME
Collects performance statistics on a function on a per-L{Op}
or per-L{Op}-class basis.
""" """
def wrapper(*args):
def __init__(self, ignore = [], by_class = True): for f in wrappers:
""" f(*args)
Creates a L{Profiler}. If by_class is True, stats will return WrapLinker(linkers, wrapper)
be collected for each L{Op} class, adding the totals for
each occurrence of that L{Op} in the computation. If
by_class is False, each node will be timed individually.
All L{Op} classes or L{Op}s (depending on the value of by_class)
listed in ignore will not be timed.
"""
self.ignore = ignore
self.stats = {}
self.by_class = by_class
def profile_env(self, f, env):
"""WRITEME"""
stats = self.stats.setdefault('TOTAL', Stats())
n, t = stats.inc_ncalls, stats.inc_time
failed = False
start = time.time()
try:
f()
end = time.time()
except:
end = time.time()
n, t = stats.inc_nfailures, stats.inc_times_failures
failed = True
ety, eva, etr = sys.exc_info()
n(1)
t(end - start)
if failed:
raise ety, eva, etr
def profile_op(self, f, op):
"""WRITEME"""
if self.by_class:
entry = op.__class__
else:
entry = op
stats = self.stats.setdefault(entry, Stats())
n, t = stats.inc_ncalls, stats.inc_time
failed = False
start = time.time()
try:
f()
end = time.time()
except:
end = time.time()
n, t = stats.inc_nfailures, stats.inc_times_failures
failed = True
exc = sys.exc_info()
if entry not in self.ignore:
n(1)
t(end - start)
if failed:
raise_with_op(op, exc)
def print_stats(self, sort_by = 'time'):
"""WRITEME"""
def compare_fn((op1, stat1), (op2, stat2)):
x1 = getattr(stat2, sort_by)
x2 = getattr(stat1, sort_by)
if x1 > x2:
return 1
elif x1 < x2:
return -1
else:
return 0
totals = self.stats['TOTAL']
print 'CPU usage statistics'
print " %-25s %9s %12s %12s %12s" % (("Op%s" % (self.by_class and ' class' or '')), 'NCALLS', 'PER_CALL', 'TOTAL', 'CPU%')
for op, stat in sorted(self.stats.items(), compare_fn):
if op == 'TOTAL': continue
to_print = self.by_class and (op.__module__ + "." + op.__name__) or str(op)
print " %-25s %9i %12.5f %12.5f %12.5f" % (to_print, stat.ncalls, stat.time / stat.ncalls, stat.time, stat.time / totals.time)
stat = self.stats['TOTAL']
print " %-25s %9i %12.5f %12.5f %12.5f" % ('TOTAL (includes overhead)', stat.ncalls, stat.time / stat.ncalls, stat.time, stat.time / totals.time)
import unittest import unittest
from theano.gof.link import PerformLinker, Profiler from theano.gof.link import PerformLinker
from theano.gof.cc import * from theano.gof.cc import *
from theano.gof.type import Type from theano.gof.type import Type
from theano.gof.graph import Result, Apply, Constant from theano.gof.graph import Result, Apply, Constant
......
...@@ -89,97 +89,127 @@ def run_all(i, node, *thunks): ...@@ -89,97 +89,127 @@ def run_all(i, node, *thunks):
def WrapLinkerMany(linkers, wrappers):
""" Variant on WrapLinker that runs a series of wrapper functions instead of
just one.
"""
def wrapper(*args):
for f in wrappers:
f(*args)
return WrapLinker(linkers, wrapper)
def DualLinker(linkers): def DualLinker(linkers):
#still in sandbox pending ticket 247
# when value_cmp is implemented, then cmp_outputs can be rewritten in a solid way, and the
# DualLinker can be this simple.
return WrapLinkerMany(linkers, [run_all, cmp_outputs]) return WrapLinkerMany(linkers, [run_all, cmp_outputs])
class ProfileMode(Mode):
def __init__(self, linker, optimizer=None): ####
local_time = [0.0] #
apply_time = {} # The Stats and Profiler classes used to be in gof/link.
op_time = {} # But Stats was not used I think, and Profiler has been implemented using the wraplinker.
op_cimpl = {} #
# -JB20090119
def blah(i, node, *thunks): ###
if 0: import time
t0 = time.time()
for th in thunks: class Stats:
th() """WRITEME"""
dt = time.time() - t0 def __init__(self):
elif 0: #more precise timing self.ncalls = 0
for th in thunks: self.time = 0
t0 = time.time() self.nfailures = 0
th() self.time_failures = 0
dt = time.time() - t0 def inc_ncalls(self, v): self.ncalls += v
elif 1: def inc_time(self, v): self.time += v
for th in thunks: def inc_nfailures(self, v): self.nfailures += v
if hasattr(th, 'cthunk'): def inc_time_failures(self, v): self.time_failures += v
t0 = time.time()
run_cthunk(th.cthunk) class Profiler:
dt = time.time() - t0 """WRITEME
else: Collects performance statistics on a function on a per-L{Op}
t0 = time.time() or per-L{Op}-class basis.
th() """
dt = time.time() - t0
elif 1: def __init__(self, ignore = [], by_class = True):
pass """
else: Creates a L{Profiler}. If by_class is True, stats will
raise Exception('one of the cases has to run the thunks!') be collected for each L{Op} class, adding the totals for
local_time[0] += dt each occurrence of that L{Op} in the computation. If
apply_time[(i,node.op)] = apply_time.get((i,node.op), 0.0) + dt by_class is False, each node will be timed individually.
op_time[node.op] = op_time.get(node.op, 0.0) + dt
op_cimpl[node.op] = hasattr(thunks[0], 'cthunk') All L{Op} classes or L{Op}s (depending on the value of by_class)
listed in ignore will not be timed.
self.local_time = local_time """
self.apply_time = apply_time self.ignore = ignore
self.op_time = op_time self.stats = {}
self.op_cimpl = op_cimpl self.by_class = by_class
wrap_linker = WrapLinkerMany([linker], [blah]) def profile_env(self, f, env):
if optimizer: """WRITEME"""
super(ProfileMode, self).__init__(wrap_linker, optimizer) stats = self.stats.setdefault('TOTAL', Stats())
n, t = stats.inc_ncalls, stats.inc_time
failed = False
start = time.time()
try:
f()
end = time.time()
except:
end = time.time()
n, t = stats.inc_nfailures, stats.inc_times_failures
failed = True
ety, eva, etr = sys.exc_info()
n(1)
t(end - start)
if failed:
raise ety, eva, etr
def profile_op(self, f, op):
"""WRITEME"""
if self.by_class:
entry = op.__class__
else: else:
super(ProfileMode, self).__init__(wrap_linker) entry = op
stats = self.stats.setdefault(entry, Stats())
def print_summary(self): n, t = stats.inc_ncalls, stats.inc_time
local_time = self.local_time[0] failed = False
apply_time = self.apply_time
op_time = self.op_time start = time.time()
try:
print '' f()
print 'ProfileMode.print_summary()' end = time.time()
print '---------------------------' except:
print '' end = time.time()
print 'local_time', local_time, '(Time spent running thunks)' n, t = stats.inc_nfailures, stats.inc_times_failures
print 'Apply-wise summary: <fraction of local_time spent at this position> (<Apply position>, <Apply Op name>)' failed = True
atimes = [(t/local_time, (a[0], str(a[1]))) for a, t in apply_time.items()] exc = sys.exc_info()
atimes.sort()
atimes.reverse() if entry not in self.ignore:
for t,a in atimes[:15]: n(1)
print '\t%.3f\t%i\t%s' % (t, a[0], a[1]) t(end - start)
print ' ... (remaining %i Apply instances account for %.2f of the runtime)'\ if failed:
%(max(0, len(atimes)-15), sum(t for t, a in atimes[15:])) raise_with_op(op, exc)
n_ops_to_print = 20 def print_stats(self, sort_by = 'time'):
print 'Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>' """WRITEME"""
otimes = [(t/local_time, a, self.op_cimpl[a]) for a, t in op_time.items()]
otimes.sort() def compare_fn((op1, stat1), (op2, stat2)):
otimes.reverse() x1 = getattr(stat2, sort_by)
for t,a,ci in otimes[:n_ops_to_print]: x2 = getattr(stat1, sort_by)
print '\t%.3f\t%s %s' % (t, '*' if ci else ' ', a) if x1 > x2:
print ' ... (remaining %i Ops account for %.2f of the runtime)'\ return 1
%(max(0, len(otimes)-n_ops_to_print), sum(t for t, a, ci in elif x1 < x2:
otimes[n_ops_to_print:])) return -1
print '(*) Op is running a c implementation' else:
return 0
totals = self.stats['TOTAL']
print 'CPU usage statistics'
print " %-25s %9s %12s %12s %12s" % (("Op%s" % (self.by_class and ' class' or '')), 'NCALLS', 'PER_CALL', 'TOTAL', 'CPU%')
for op, stat in sorted(self.stats.items(), compare_fn):
if op == 'TOTAL': continue
to_print = self.by_class and (op.__module__ + "." + op.__name__) or str(op)
print " %-25s %9i %12.5f %12.5f %12.5f" % (to_print, stat.ncalls, stat.time / stat.ncalls, stat.time, stat.time / totals.time)
stat = self.stats['TOTAL']
print " %-25s %9i %12.5f %12.5f %12.5f" % ('TOTAL (includes overhead)', stat.ncalls, stat.time / stat.ncalls, stat.time, stat.time / totals.time)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论