提交 3f2298bd authored 作者: James Bergstra's avatar James Bergstra

better profiling

上级 7f45d400
...@@ -9,6 +9,7 @@ import theano.tensor as T ...@@ -9,6 +9,7 @@ import theano.tensor as T
import theano.sandbox import theano.sandbox
import theano.sandbox.wraplinker import theano.sandbox.wraplinker
from theano.compile import module, Mode from theano.compile import module, Mode
from theano.sandbox.wraplinker import ProfileMode
if 0: if 0:
class Opt(object): class Opt(object):
...@@ -184,7 +185,8 @@ class M(module.Module): ...@@ -184,7 +185,8 @@ class M(module.Module):
mod = M() mod = M()
#m = mod.make(mode='FAST_RUN') #m = mod.make(mode='FAST_RUN')
m = mod.make(mode=Mode(optimizer='fast_run', linker=linker())) mode = ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
m = mod.make(mode=mode)
neg, nout, nhid, niter = [int(a) for a in sys.argv[1:]] neg, nout, nhid, niter = [int(a) for a in sys.argv[1:]]
rng = numpy.random.RandomState(342) rng = numpy.random.RandomState(342)
...@@ -198,4 +200,5 @@ t = time.time() ...@@ -198,4 +200,5 @@ t = time.time()
for i in xrange(niter): for i in xrange(niter):
err = m.step(x) err = m.step(x)
print 'time: ',time.time() - t, 'err: ', err print 'time: ',time.time() - t, 'err: ', err
mode.print_summary()
...@@ -686,7 +686,15 @@ class CLinker(link.Linker): ...@@ -686,7 +686,15 @@ class CLinker(link.Linker):
instantiate.customize.add_support_code(support_code) instantiate.customize.add_support_code(support_code)
instantiate.customize.add_support_code(self.struct_code) instantiate.customize.add_support_code(self.struct_code)
instantiate.customize.add_support_code(static) instantiate.customize.add_support_code(static)
instantiate.customize.add_extra_compile_arg("-w") for extra_arg in ("-w", #-w means supress all warnings
):
#"-O3",
#"-ffast-math",
#"-fprefetch-loop-arrays",
#"-ftree-vect-loop-version",
#"-ftree-loop-optimize",
#"-ftree-vectorize"):
instantiate.customize.add_extra_compile_arg(extra_arg)
for arg in self.compile_args(): for arg in self.compile_args():
instantiate.customize.add_extra_compile_arg(arg) instantiate.customize.add_extra_compile_arg(arg)
for header in self.headers(): for header in self.headers():
......
...@@ -103,7 +103,7 @@ def DualLinker(linkers): ...@@ -103,7 +103,7 @@ def DualLinker(linkers):
class ProfileMode(Mode): class ProfileMode(Mode):
def __init__(self, local_linker, optimizer=None): def __init__(self, linker, optimizer=None):
local_time = [0.0] local_time = [0.0]
apply_time = {} apply_time = {}
op_time = {} op_time = {}
...@@ -121,31 +121,34 @@ class ProfileMode(Mode): ...@@ -121,31 +121,34 @@ class ProfileMode(Mode):
self.apply_time = apply_time self.apply_time = apply_time
self.op_time = op_time self.op_time = op_time
linker = WrapLinkerMany([local_linker], [blah]) wrap_linker = WrapLinkerMany([linker], [blah])
if optimizer: if optimizer:
Mode.__init__(self, linker, optimizer) super(ProfileMode, self).__init__(wrap_linker, optimizer)
else: else:
Mode.__init__(self, linker) super(ProfileMode, self).__init__(wrap_linker)
def print_summary(self): def print_summary(self):
local_time = self.local_time[0] local_time = self.local_time[0]
apply_time = self.apply_time apply_time = self.apply_time
op_time = self.op_time op_time = self.op_time
print 'local_time', local_time print ''
print 'apply-wise times' print 'ProfileMode.print_summary()'
print '---------------------------'
print ''
print 'local_time', local_time, '(Time spent running thunks)'
print 'Apply-wise summary: <fraction of local_time spent at this position> (<Apply position>, <Apply Op name>)'
atimes = [(t/local_time, (a[0], str(a[1]))) for a, t in apply_time.items()] atimes = [(t/local_time, (a[0], str(a[1]))) for a, t in apply_time.items()]
atimes.sort() atimes.sort()
atimes.reverse() atimes.reverse()
for t,a in atimes[:15]: for t,a in atimes[:15]:
print ' ', t, a print ' ', t, a
print ' ...' #show that we are ignoring applies that don't take much time print ' ... (ignoring %i other Apply instances)'%max(0, len(atimes)-15)
print 'op-wise times' print 'Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>'
otimes = [(t/local_time, a) for a, t in op_time.items()] otimes = [(t/local_time, a) for a, t in op_time.items()]
otimes.sort() otimes.sort()
otimes.reverse() otimes.reverse()
for t,a in otimes[:15]: for t,a in otimes[:15]:
print ' ', t, a print ' ', t, a
print ' ...' #show that we are ignoring applies that don't take much time print ' ... (ignoring %i other kinds Ops)'%max(0, len(otimes)-15)
print sum(t for a,t in op_time.items())
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论