提交 e689a202 authored 作者: Frederic's avatar Frederic

come pep8

上级 a2f5f3b6
import time, atexit, copy import atexit
import copy
import time
from theano.gof.link import WrapLinker from theano.gof.link import WrapLinker
from theano.compile.mode import Mode, register_mode, predefined_modes, predefined_linkers, predefined_optimizers from theano.compile.mode import (Mode, register_mode,
predefined_modes, predefined_linkers,
predefined_optimizers)
from theano.gof.python25 import any from theano.gof.python25 import any
from theano import gof from theano import gof
from theano.configparser import config, AddConfigVar, IntParam, BoolParam from theano.configparser import config, AddConfigVar, IntParam, BoolParam
...@@ -33,9 +37,10 @@ AddConfigVar('ProfileMode.profile_memory', ...@@ -33,9 +37,10 @@ AddConfigVar('ProfileMode.profile_memory',
BoolParam(False), BoolParam(False),
in_c_key=False) in_c_key=False)
class Profile_Maker(FunctionMaker): class Profile_Maker(FunctionMaker):
def create(self, input_storage=None, trustme=False): def create(self, input_storage=None, trustme=False):
ret = super(Profile_Maker,self).create(input_storage, trustme) ret = super(Profile_Maker, self).create(input_storage, trustme)
# create a function-specific storage container for profiling info # create a function-specific storage container for profiling info
profile = ProfileStats(atexit_print=False) profile = ProfileStats(atexit_print=False)
...@@ -44,12 +49,12 @@ class Profile_Maker(FunctionMaker): ...@@ -44,12 +49,12 @@ class Profile_Maker(FunctionMaker):
#initialize the timers #initialize the timers
for i, node in enumerate(ret.maker.env.toposort()): for i, node in enumerate(ret.maker.env.toposort()):
profile.apply_time[node]=0.0 profile.apply_time[node] = 0.0
profile.outputs_size[node]=[0.0] * len(node.outputs) profile.outputs_size[node] = [0.0] * len(node.outputs)
# a thunk_group is a list of the thunks from each linker # a thunk_group is a list of the thunks from each linker
# corresponding to the i'th position in the toposort. # corresponding to the i'th position in the toposort.
assert len(ret.fn.thunk_groups[i])==1 assert len(ret.fn.thunk_groups[i]) == 1
profile.apply_cimpl[node] = hasattr( profile.apply_cimpl[node] = hasattr(
ret.fn.thunk_groups[i][0], ret.fn.thunk_groups[i][0],
'cthunk') 'cthunk')
...@@ -62,6 +67,7 @@ class Profile_Maker(FunctionMaker): ...@@ -62,6 +67,7 @@ class Profile_Maker(FunctionMaker):
#capture old fn in closure. This is important since new_fn is about to #capture old fn in closure. This is important since new_fn is about to
#take its place as ret.fn. #take its place as ret.fn.
ret_fn = ret.fn ret_fn = ret.fn
def new_fn(): def new_fn():
self.mode.apply_time = self.mode.profile_stats[ret].apply_time self.mode.apply_time = self.mode.profile_stats[ret].apply_time
self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size
...@@ -81,6 +87,7 @@ class Profile_Maker(FunctionMaker): ...@@ -81,6 +87,7 @@ class Profile_Maker(FunctionMaker):
return ret return ret
class ProfileMode(Mode): class ProfileMode(Mode):
def __init__(self, linker=None, optimizer='default'): def __init__(self, linker=None, optimizer='default'):
if linker is None: if linker is None:
...@@ -94,7 +101,7 @@ class ProfileMode(Mode): ...@@ -94,7 +101,7 @@ class ProfileMode(Mode):
message, message,
profile_stats)) profile_stats))
def function_maker(self, i,o,m, *args, **kwargs): def function_maker(self, i, o, m, *args, **kwargs):
"""Return an instance of `Profiler_Maker` which init the count""" """Return an instance of `Profiler_Maker` which init the count"""
assert m is self assert m is self
...@@ -128,9 +135,10 @@ class ProfileMode(Mode): ...@@ -128,9 +135,10 @@ class ProfileMode(Mode):
failure = run_cthunk(th.cthunk) failure = run_cthunk(th.cthunk)
dt = time.time() - t0 dt = time.time() - t0
if failure: if failure:
raise RuntimeError(('A C Op raised an exception. ProfileMode cannot' raise RuntimeError(
' tell you what it was though. Use a standard mode such as' ('A C Op raised an exception. ProfileMode cannot'
' FAST_RUN to correct the problem.')) ' tell you what it was though. Use a standard mode'
' such as FAST_RUN to correct the problem.'))
else: else:
t0 = time.time() t0 = time.time()
th() th()
...@@ -140,7 +148,6 @@ class ProfileMode(Mode): ...@@ -140,7 +148,6 @@ class ProfileMode(Mode):
# insufficient to measure it. So we add an epsilon. # insufficient to measure it. So we add an epsilon.
self.apply_time[node] += max(dt, 1e-14) self.apply_time[node] += max(dt, 1e-14)
def profile_thunk2(i, node, th): def profile_thunk2(i, node, th):
""" Profile the execution time and the memory size. """ Profile the execution time and the memory size.
""" """
...@@ -150,25 +157,27 @@ class ProfileMode(Mode): ...@@ -150,25 +157,27 @@ class ProfileMode(Mode):
failure = run_cthunk(th.cthunk) failure = run_cthunk(th.cthunk)
dt = time.time() - t0 dt = time.time() - t0
if failure: if failure:
raise RuntimeError(('A C Op raised an exception. ProfileMode cannot' raise RuntimeError(
' tell you what it was though. Use a standard mode such as' ('A C Op raised an exception. ProfileMode cannot'
' FAST_RUN to correct the problem.')) ' tell you what it was though. Use a standard mode'
' such as FAST_RUN to correct the problem.'))
else: else:
t0 = time.time() t0 = time.time()
th() th()
dt = time.time() - t0 dt = time.time() - t0
size=[] size = []
for o in th.outputs: for o in th.outputs:
if not hasattr(o[0],'size'): if not hasattr(o[0], 'size'):
#if the output type don't have a size attribute, set -1 #if the output type don't have a size attribute, set -1
#to signify we can't evaluate it. #to signify we can't evaluate it.
#This happen at least for mtrand.RandomState type(in numpy) #This happen at least for mtrand.RandomState type(in numpy)
size.append(-1) size.append(-1)
continue continue
s=o[0].size s = o[0].size
#can't use o[0].dtype.itemsize as dtype is a str for CudaNdarray #can't use o[0].dtype.itemsize as dtype is a str for
#CudaNdarray
dtype = str(o[0].dtype) dtype = str(o[0].dtype)
dtype2=dtype[-2:] dtype2 = dtype[-2:]
if dtype2 == '32': if dtype2 == '32':
s *= 4 s *= 4
elif dtype2 == '64': elif dtype2 == '64':
...@@ -180,12 +189,12 @@ class ProfileMode(Mode): ...@@ -180,12 +189,12 @@ class ProfileMode(Mode):
elif dtype[-3:] == '128': elif dtype[-3:] == '128':
s *= 16 s *= 16
else: else:
raise Exception("Can't determine the memory size of dtype",o[0].dtype) raise Exception("Can't determine the memory size of dtype",
o[0].dtype)
size.append(s) size.append(s)
self.outputs_size[node]=size self.outputs_size[node] = size
self.apply_time[node] += max(dt, 1e-14) self.apply_time[node] += max(dt, 1e-14)
self.provided_linker = linker self.provided_linker = linker
self.provided_optimizer = optimizer self.provided_optimizer = optimizer
if isinstance(linker, basestring) or linker is None: if isinstance(linker, basestring) or linker is None:
...@@ -207,7 +216,7 @@ class ProfileMode(Mode): ...@@ -207,7 +216,7 @@ class ProfileMode(Mode):
self.optimizer_time = 0 self.optimizer_time = 0
self.linker_time = 0 self.linker_time = 0
def print_summary(self,**kwargs): def print_summary(self, **kwargs):
""" Print 3 summary that show where the time is spend. The first show an Apply-wise summary, the second show an Op-wise summary, the third show an type-Op-wise summary. """ Print 3 summary that show where the time is spend. The first show an Apply-wise summary, the second show an Op-wise summary, the third show an type-Op-wise summary.
The Apply-wise summary print the timing information for the worst offending Apply nodes. This corresponds to individual Op applications within your graph which take the longest to execute (so if you use dot twice, you will see two entries there). The Apply-wise summary print the timing information for the worst offending Apply nodes. This corresponds to individual Op applications within your graph which take the longest to execute (so if you use dot twice, you will see two entries there).
...@@ -220,7 +229,8 @@ class ProfileMode(Mode): ...@@ -220,7 +229,8 @@ class ProfileMode(Mode):
Currently there is n_apply_to_print, n_ops_to_print and min_memory_size Currently there is n_apply_to_print, n_ops_to_print and min_memory_size
that are accepted. that are accepted.
""" """
compile_time = sum([ps.compile_time for ps in self.profile_stats.values()]) compile_time = sum([ps.compile_time for ps
in self.profile_stats.values()])
fct_call = dict([(fn, ps.fct_callcount) fct_call = dict([(fn, ps.fct_callcount)
for (fn, ps) in self.profile_stats.items()]) for (fn, ps) in self.profile_stats.items()])
...@@ -232,7 +242,7 @@ class ProfileMode(Mode): ...@@ -232,7 +242,7 @@ class ProfileMode(Mode):
for fn, ps in self.profile_stats.items(): for fn, ps in self.profile_stats.items():
for (i, node) in enumerate(fn.maker.env.toposort()): for (i, node) in enumerate(fn.maker.env.toposort()):
apply_time[(i, node)] = ps.apply_time[node] apply_time[(i, node)] = ps.apply_time[node]
for (i,n),t in apply_time.items(): for (i, n), t in apply_time.items():
if t == 0: if t == 0:
print i, n print i, n
...@@ -248,12 +258,13 @@ class ProfileMode(Mode): ...@@ -248,12 +258,13 @@ class ProfileMode(Mode):
outputs_size.update(ps.outputs_size) outputs_size.update(ps.outputs_size)
other_time = dict( other_time = dict(
linker_time = sum( linker_time=sum(
[ps.linker_time for ps in self.profile_stats.values()]), [ps.linker_time for ps in self.profile_stats.values()]),
optimizer_time = sum( optimizer_time=sum(
[ps.optimizer_time for ps in self.profile_stats.values()])) [ps.optimizer_time for ps in self.profile_stats.values()]))
self.print_summary_("print_summary", compile_time, fct_call_time, fct_call, self.print_summary_("print_summary",
compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size, apply_time, apply_cimpl, message, outputs_size,
self.local_time, other_time, self.local_time, other_time,
**kwargs) **kwargs)
...@@ -269,30 +280,32 @@ class ProfileMode(Mode): ...@@ -269,30 +280,32 @@ class ProfileMode(Mode):
that are accepted. that are accepted.
""" """
def diff_dict(a_time,b_time_): def diff_dict(a_time, b_time_):
r = {} r = {}
b_time = copy.copy(b_time_) b_time = copy.copy(b_time_)
for a,ta in a_time.items(): for a, ta in a_time.items():
r.setdefault(a,0) r.setdefault(a, 0)
tb = b_time.pop(a,0) tb = b_time.pop(a, 0)
r[a]+=ta-tb r[a] += ta - tb
#they are missing in a #they are missing in a
for a,t in b_time.items(): for a, t in b_time.items():
r.setdefault(a,0) r.setdefault(a, 0)
r[a]+=t r[a] += t
return r return r
compile_time = self.compile_time-other.compile_time compile_time = self.compile_time - other.compile_time
fct_call_time = diff_dict(self.fct_call_time,other.fct_call_time) fct_call_time = diff_dict(self.fct_call_time, other.fct_call_time)
fct_call = diff_dict(self.fct_call,other.fct_call) fct_call = diff_dict(self.fct_call, other.fct_call)
apply_time = diff_dict(self.apply_time, other.apply_time) apply_time = diff_dict(self.apply_time, other.apply_time)
apply_cimpl = self.apply_cimpl and other.apply_cimpl apply_cimpl = self.apply_cimpl and other.apply_cimpl
message = self.message message = self.message
outputs_size = diff_dict(self.outputs_size,other.outputs_size) outputs_size = diff_dict(self.outputs_size, other.outputs_size)
other_time = {'linker_time':self.linker_time-other.linker_time, other_time = {'linker_time': self.linker_time - other.linker_time,
'optimizer_time':self.optimizer_time-other.optimizer_time} 'optimizer_time': self.optimizer_time -
self.print_summary_("print_diff_summary", compile_time, fct_call_time, fct_call, other.optimizer_time}
self.print_summary_("print_diff_summary", compile_time,
fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size, apply_time, apply_cimpl, message, outputs_size,
print_apply=False, other_time=other_time, print_apply=False, other_time=other_time,
**kwargs) **kwargs)
...@@ -321,17 +334,18 @@ class ProfileMode(Mode): ...@@ -321,17 +334,18 @@ class ProfileMode(Mode):
total_fct_call = sum(fct_call.values()) total_fct_call = sum(fct_call.values())
unknown_time = total_time - total_fct_time - compile_time unknown_time = total_time - total_fct_time - compile_time
overhead_time = total_fct_time - local_time overhead_time = total_fct_time - local_time
if total_fct_time>0: if total_fct_time > 0:
time_pr_in_fct = local_time/total_fct_time*100 time_pr_in_fct = local_time / total_fct_time * 100
overhead_time_pourcent_fct_time = overhead_time/total_fct_time*100 overhead_time_pourcent_fct_time = (overhead_time / total_fct_time *
time_per_call = total_fct_time/total_fct_call 100)
time_per_call = total_fct_time / total_fct_call
else: else:
time_pr_in_fct = 0 time_pr_in_fct = 0
overhead_time_pourcent_fct_time = 0 overhead_time_pourcent_fct_time = 0
time_per_call = 0 time_per_call = 0
print print
print 'ProfileMode.%s(%s)'%(fct_name,message) print 'ProfileMode.%s(%s)' % (fct_name,message)
print '---------------------------' print '---------------------------'
print print
print 'Time since import %.3fs'%(total_time) print 'Time since import %.3fs'%(total_time)
...@@ -587,20 +601,40 @@ Test them first, as they are not guaranteed to always provide a speedup.""" ...@@ -587,20 +601,40 @@ Test them first, as they are not guaranteed to always provide a speedup."""
from theano.tensor.raw_random import RandomFunction from theano.tensor.raw_random import RandomFunction
import theano import theano
import theano.scalar as scal import theano.scalar as scal
scalar_op_amdlibm_no_speed_up = [scal.LT, scal.GT, scal.LE, scal.GE, scal.EQ, scal.NEQ, scal.InRange, scal.Switch, scal.OR, scal.XOR, scal.AND, scal.Invert, scal.Maximum, scal.Minimum, scal.Add, scal.Mul, scal.Sub, scal.TrueDiv, scal.IntDiv, scal.Clip, scal.Second, scal.Identity, scal.Cast, scal.Sgn, scal.Neg, scal.Inv, scal.Sqr ] scalar_op_amdlibm_no_speed_up = [scal.LT, scal.GT, scal.LE, scal.GE,
scalar_op_amdlibm_speed_up = [scal.Mod, scal.Pow, scal.Ceil, scal.Floor, scal.RoundHalfToEven, scal.RoundHalfAwayFromZero, scal.Log, scal.Log2, scal.Log10, scal.Log1p, scal.Exp, scal.Sqrt, scal.Abs, scal.Cos, scal.Sin, scal.Tan, scal.Tanh, scal.Cosh, scal.Sinh, T.nnet.sigm.ScalarSigmoid, T.nnet.sigm.ScalarSoftplus ]#Abs, Mod in float{32,64} only scal.EQ, scal.NEQ, scal.InRange,
scal.Switch, scal.OR, scal.XOR,
scal.AND, scal.Invert, scal.Maximum,
scal.Minimum, scal.Add, scal.Mul,
scal.Sub, scal.TrueDiv, scal.IntDiv,
scal.Clip, scal.Second, scal.Identity,
scal.Cast, scal.Sgn, scal.Neg,
scal.Inv, scal.Sqr]
scalar_op_amdlibm_speed_up = [scal.Mod, scal.Pow, scal.Ceil,
scal.Floor, scal.RoundHalfToEven,
scal.RoundHalfAwayFromZero, scal.Log,
scal.Log2, scal.Log10, scal.Log1p,
scal.Exp, scal.Sqrt, scal.Abs, scal.Cos,
scal.Sin, scal.Tan, scal.Tanh,
scal.Cosh, scal.Sinh,
T.nnet.sigm.ScalarSigmoid,
T.nnet.sigm.ScalarSoftplus]
# Abs, Mod in float{32,64} only
def get_scalar_ops(s): def get_scalar_ops(s):
if isinstance(s, theano.scalar.Composite): if isinstance(s, theano.scalar.Composite):
l = [] l = []
for node in s.env.toposort(): for node in s.env.toposort():
l+=get_scalar_ops(node.op) l += get_scalar_ops(node.op)
return l return l
else: return [s] else:
return [s]
def list_scalar_op(op): def list_scalar_op(op):
if isinstance(op.scalar_op, theano.scalar.Composite): if isinstance(op.scalar_op, theano.scalar.Composite):
return get_scalar_ops(op.scalar_op) return get_scalar_ops(op.scalar_op)
else: return [op.scalar_op] else:
return [op.scalar_op]
def amdlibm_speed_up(op): def amdlibm_speed_up(op):
if not isinstance(op, T.Elemwise): if not isinstance(op, T.Elemwise):
...@@ -613,6 +647,7 @@ Test them first, as they are not guaranteed to always provide a speedup.""" ...@@ -613,6 +647,7 @@ Test them first, as they are not guaranteed to always provide a speedup."""
elif s_op.__class__ not in scalar_op_amdlibm_no_speed_up: elif s_op.__class__ not in scalar_op_amdlibm_no_speed_up:
print "We don't know if amdlibm will accelerate this scalar op.", s_op print "We don't know if amdlibm will accelerate this scalar op.", s_op
return False return False
def exp_float32_op(op): def exp_float32_op(op):
if not isinstance(op, T.Elemwise): if not isinstance(op, T.Elemwise):
return False return False
...@@ -622,17 +657,20 @@ Test them first, as they are not guaranteed to always provide a speedup.""" ...@@ -622,17 +657,20 @@ Test them first, as they are not guaranteed to always provide a speedup."""
printed_tip = False printed_tip = False
#tip 1 #tip 1
if config.floatX=='float64': if config.floatX == 'float64':
print " - Try the Theano flag floatX=float32" print " - Try the Theano flag floatX=float32"
printed_tip = True printed_tip = True
#tip 2 #tip 2
if not config.lib.amdlibm and any([amdlibm_speed_up(a.op) for i,a in apply_time]): if not config.lib.amdlibm and any([amdlibm_speed_up(a.op) for i, a
in apply_time]):
print " - Try installing amdlibm and set the Theano flag lib.amdlibm=True. This speeds up only some Elemwise operation." print " - Try installing amdlibm and set the Theano flag lib.amdlibm=True. This speeds up only some Elemwise operation."
printed_tip = True printed_tip = True
#tip 3 #tip 3
if not config.lib.amdlibm and any([exp_float32_op(a.op) and a.inputs[0].dtype=='float32' for i,a in apply_time]): if not config.lib.amdlibm and any([exp_float32_op(a.op) and
a.inputs[0].dtype == 'float32'
for i, a in apply_time]):
print " - With the default gcc libm, exp in float32 is slower then in float64! Try Theano flag floatX=float64, or install amdlibm and set the theano flags lib.amdlibm=True" print " - With the default gcc libm, exp in float32 is slower then in float64! Try Theano flag floatX=float64, or install amdlibm and set the theano flags lib.amdlibm=True"
printed_tip = True printed_tip = True
...@@ -656,10 +694,12 @@ Test them first, as they are not guaranteed to always provide a speedup.""" ...@@ -656,10 +694,12 @@ Test them first, as they are not guaranteed to always provide a speedup."""
if not printed_tip: if not printed_tip:
print " Sorry, no tip for today." print " Sorry, no tip for today."
register_mode('PROFILE_MODE',ProfileMode()) register_mode('PROFILE_MODE', ProfileMode())
#needed to print the profile at the end automatically #needed to print the profile at the end automatically
prof_mode_instance_to_print=[predefined_modes["PROFILE_MODE"]] prof_mode_instance_to_print = [predefined_modes["PROFILE_MODE"]]
def atexit_print_default_profile_mode(): def atexit_print_default_profile_mode():
"""Print the summary of the predefined mode PROFILE_MODE if used. """Print the summary of the predefined mode PROFILE_MODE if used.
...@@ -668,7 +708,7 @@ def atexit_print_default_profile_mode(): ...@@ -668,7 +708,7 @@ def atexit_print_default_profile_mode():
config.mode=PROFILE_MODE config.mode=PROFILE_MODE
""" """
for prof_mode in prof_mode_instance_to_print: for prof_mode in prof_mode_instance_to_print:
if prof_mode.local_time>0: if prof_mode.local_time > 0:
prof_mode.print_summary() prof_mode.print_summary()
#Register atexit_print_default_profile_mode to have the summary of the #Register atexit_print_default_profile_mode to have the summary of the
...@@ -678,6 +718,8 @@ atexit.register(atexit_print_default_profile_mode) ...@@ -678,6 +718,8 @@ atexit.register(atexit_print_default_profile_mode)
# Here we define an hook that allow to print extra profiling information # Here we define an hook that allow to print extra profiling information
profiler_printers = [] profiler_printers = []
def register_profiler_printer(fct): def register_profiler_printer(fct):
profiler_printers.append(fct) profiler_printers.append(fct)
return fct return fct
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论