提交 3f2298bd authored 作者: James Bergstra's avatar James Bergstra

better profiling

上级 7f45d400
......@@ -9,6 +9,7 @@ import theano.tensor as T
import theano.sandbox
import theano.sandbox.wraplinker
from theano.compile import module, Mode
from theano.sandbox.wraplinker import ProfileMode
if 0:
class Opt(object):
......@@ -184,7 +185,8 @@ class M(module.Module):
mod = M()
#m = mod.make(mode='FAST_RUN')
m = mod.make(mode=Mode(optimizer='fast_run', linker=linker()))
mode = ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
m = mod.make(mode=mode)
neg, nout, nhid, niter = [int(a) for a in sys.argv[1:]]
rng = numpy.random.RandomState(342)
......@@ -198,4 +200,5 @@ t = time.time()
for i in xrange(niter):
err = m.step(x)
print 'time: ',time.time() - t, 'err: ', err
mode.print_summary()
......@@ -686,7 +686,15 @@ class CLinker(link.Linker):
instantiate.customize.add_support_code(support_code)
instantiate.customize.add_support_code(self.struct_code)
instantiate.customize.add_support_code(static)
instantiate.customize.add_extra_compile_arg("-w")
for extra_arg in ("-w", #-w means supress all warnings
):
#"-O3",
#"-ffast-math",
#"-fprefetch-loop-arrays",
#"-ftree-vect-loop-version",
#"-ftree-loop-optimize",
#"-ftree-vectorize"):
instantiate.customize.add_extra_compile_arg(extra_arg)
for arg in self.compile_args():
instantiate.customize.add_extra_compile_arg(arg)
for header in self.headers():
......
......@@ -103,7 +103,7 @@ def DualLinker(linkers):
class ProfileMode(Mode):
def __init__(self, local_linker, optimizer=None):
def __init__(self, linker, optimizer=None):
local_time = [0.0]
apply_time = {}
op_time = {}
......@@ -121,31 +121,34 @@ class ProfileMode(Mode):
self.apply_time = apply_time
self.op_time = op_time
linker = WrapLinkerMany([local_linker], [blah])
wrap_linker = WrapLinkerMany([linker], [blah])
if optimizer:
Mode.__init__(self, linker, optimizer)
super(ProfileMode, self).__init__(wrap_linker, optimizer)
else:
Mode.__init__(self, linker)
super(ProfileMode, self).__init__(wrap_linker)
def print_summary(self):
local_time = self.local_time[0]
apply_time = self.apply_time
op_time = self.op_time
print 'local_time', local_time
print 'apply-wise times'
print ''
print 'ProfileMode.print_summary()'
print '---------------------------'
print ''
print 'local_time', local_time, '(Time spent running thunks)'
print 'Apply-wise summary: <fraction of local_time spent at this position> (<Apply position>, <Apply Op name>)'
atimes = [(t/local_time, (a[0], str(a[1]))) for a, t in apply_time.items()]
atimes.sort()
atimes.reverse()
for t,a in atimes[:15]:
print ' ', t, a
print ' ...' #show that we are ignoring applies that don't take much time
print 'op-wise times'
print ' ... (ignoring %i other Apply instances)'%max(0, len(atimes)-15)
print 'Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>'
otimes = [(t/local_time, a) for a, t in op_time.items()]
otimes.sort()
otimes.reverse()
for t,a in otimes[:15]:
print ' ', t, a
print ' ...' #show that we are ignoring applies that don't take much time
print sum(t for a,t in op_time.items())
print ' ... (ignoring %i other kinds Ops)'%max(0, len(otimes)-15)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论