提交 2677e15f authored 作者: James Bergstra's avatar James Bergstra

rewrite of ProfileMode to use ProfileStats

上级 5ccdcace
......@@ -15,7 +15,7 @@ from theano import gof
from theano.gof.python25 import partial
import mode as mode_module
from io import In, SymbolicInput, SymbolicInputKit, SymbolicOutput
from theano.configdefaults import config
import logging
_logger = logging.getLogger('theano.compile.function_module')
......@@ -495,6 +495,7 @@ class Function(object):
return cpy
def __call__(self, *args, **kwargs):
profile = self.profile
t0 = time.time()
# Reinitialize each container's 'provided' counter
......@@ -536,8 +537,7 @@ class Function(object):
for k, arg in kwargs.iteritems():
self[k] = arg
if ( not hasattr(self, '_check_for_aliased_inputs') or
if (not hasattr(self, '_check_for_aliased_inputs') or
self._check_for_aliased_inputs):
## Collect aliased inputs among the storage space
args_share_memory = []
......@@ -592,9 +592,21 @@ class Function(object):
self.inv_finder[c]))
# Do the actual work
t0_fn = time.time()
self.fn()
dt_fn = time.time() - t0_fn
if profile:
t0_fn = time.time()
try:
self.fn()
except:
if hasattr(self.fn, 'position_of_error'):
# this is a new vm-provided function
# the C VM needs this because the exception manipulation
# done by raise_with_op is not implemented in C.
gof.vm.raise_with_op(self.fn.nodes[self.fn.position_of_error])
else:
# old-style linkers raise their own exceptions
raise
if profile:
profile.vm_call_time += time.time() - t0_fn
# Retrieve the values that were computed
outputs = [x.data for x in self.output_storage]
......@@ -626,20 +638,18 @@ class Function(object):
if isinstance(value, gof.Container):
value = value.storage[0]
self[i] = value
#
# NOTE: This logic needs to be replicated in
# scan.
# grep for 'PROFILE_CODE'
#
dt_call=time.time()-t0
if hasattr(self.maker.mode,'fct_call_time'):
self.maker.mode.fct_call_time[self] += dt_call
self.maker.mode.fct_call[self] += 1
self.maker.mode.call_time += dt_call
self.maker.mode.fn_time += dt_fn
if profile:
dt_call=time.time()-t0
profile.fct_callcount += 1
profile.fct_call_time += dt_call
if hasattr(self.fn, 'update_profile'):
self.fn.update_profile(profile)
if self.return_none:
return None
......@@ -687,9 +697,10 @@ def _pickle_Function(f):
if (i < j) and isinstance(d_i, numpy.ndarray) and isinstance(d_j, numpy.ndarray):
if numpy.may_share_memory(d_i, d_j):
if f.pickle_aliased_memory_strategy == 'warn':
_logger.warning('aliased relationship between Function arguments '
'will not be preserved by un-pickling operation')
#_logger.debug(str([d_i, d_j, id(d_i), id(d_j)]))
_logger.warning(('aliased relationship between'
' Function arguments %s, %s'
' will not be preserved by un-pickling'
' operation') %(str(d_i), str(d_j)))
else:
raise AliasedMemoryError(d_i, d_j)
......@@ -893,7 +904,8 @@ class FunctionMaker(object):
raise TypeError("Unknown output type: %s (%s)", type(output), output)
def __init__(self, inputs, outputs,
mode = None, accept_inplace = False, function_builder = Function):
mode = None, accept_inplace = False, function_builder = Function,
profile=None):
"""
:type inputs: a list of SymbolicInput instances
......@@ -908,9 +920,18 @@ class FunctionMaker(object):
:param accept_inplace: True iff it is acceptable to have inplace operations
in the graph from the inputs to the outputs
"""
mode = mode_module.get_mode(mode)
# figure out which profile object to use (if any)
# to help with forward-porting ProfileMode,
# we allow ProfileMode to provide a ProfileStats object
# using this somewhat awkward mechanism.
mode_profile = getattr(mode, 'profile', None)
if (profile is not None) and (mode_profile is not None):
raise TypeError(
'profile passed via both "mode" and "profile" arguments')
self.profile = profile = profile or mode_profile
# Handle the case where inputs and/or outputs is a single Variable (not in a list)
unpack_single = False
return_none = False
......@@ -951,7 +972,8 @@ class FunctionMaker(object):
end_optimizer = time.time()
finally:
theano.config.compute_test_value = compute_test_value_orig
mode.optimizer_time += end_optimizer - start_optimizer
if profile:
profile.optimizer_time += end_optimizer - start_optimizer
_logger.debug('Optimizing took %f seconds' % (end_optimizer - start_optimizer))
#Add deep copy to respect the memory interface
......@@ -1031,7 +1053,9 @@ class FunctionMaker(object):
_fn, _i, _o = self.linker.make_thunk(input_storage = input_storage_lists)
end_linker = time.time()
_logger.debug('Linker took %f seconds' % (end_linker - start_linker))
self.mode.linker_time += end_linker - start_linker
if self.profile:
self.profile.linker_time += end_linker - start_linker
_fn.time_thunks = profile.flag_time_thunks
fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs, defaults, self.unpack_single, self.return_none, self)
return fn
......@@ -1077,7 +1101,7 @@ def check_equal(x, y):
def register_checker(checker):
__checkers.insert(0, checker)
def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None):
def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None, profile=None):
"""
Return a Function that will calculate the outputs from the inputs.
......@@ -1105,6 +1129,8 @@ def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None)
:param accept_inplace: True iff the graph can contain inplace operations prior to the
optimization phase (default is False)
:param profile: None or ProfileStats instance
"""
#Every element of the input list will be upgraded to an `In` instance if necessary,
......@@ -1130,8 +1156,16 @@ def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None)
if not mode:
raise ValueError("Please provide at least one mode.")
elif len(mode) == 1:
fn = FunctionMaker(inputs, outputs, mode[0], accept_inplace = accept_inplace).create(defaults)
fn = FunctionMaker(
inputs,
outputs,
mode[0],
accept_inplace = accept_inplace,
profile=profile).create(
defaults)
else:
if profile:
raise NotImplementedError('profiling not implemented in this kind of mode')
#return a different kind of function
def dup_defaults():
# TODO This may need to be changed to use containers as defaults.
......@@ -1153,19 +1187,18 @@ def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None)
fn = maker1.create(defaults)
else:
Maker = getattr(mode, 'function_maker', FunctionMaker)
fn = Maker(inputs, outputs, mode, accept_inplace = accept_inplace).create(defaults)
fn = Maker(inputs,
outputs,
mode,
accept_inplace = accept_inplace,
profile=profile).create(
defaults)
t2 = time.time()
if hasattr(mode, 'compile_time'):
mode.compile_time+=t2-t1
if profile:
profile.compile_time+=t2-t1
fn.name = name
if hasattr(mode,'fct_call_time'):
mode.fct_call_time.setdefault(fn,0)
if hasattr(mode,'fct_call'):
mode.fct_call.setdefault(fn,0)
return fn
......
......@@ -8,6 +8,8 @@ from theano.configparser import config, AddConfigVar, IntParam, BoolParam
from theano.compile.function_module import FunctionMaker
run_cthunk = None # Will be imported only when needed.
from profiling import ProfileStats
import_time = time.time()
AddConfigVar('ProfileMode.n_apply_to_print',
......@@ -34,24 +36,53 @@ AddConfigVar('ProfileMode.profile_memory',
class Profile_Maker(FunctionMaker):
def create(self, input_storage=None, trustme=False):
ret = super(Profile_Maker,self).create(input_storage, trustme)
# create a function-specific storage container for profiling info
profile = ProfileStats(atexit_print=False)
self.mode.profile_stats[ret] = profile
ret.profile = profile
#initialize the timers
for i, node in enumerate(ret.maker.env.toposort()):
self.mode.apply_time[(i,node)]=0.0
assert len(ret.fn.thunk_groups[i])==1
self.mode.op_cimpl[node.op] = hasattr(ret.fn.thunk_groups[i][0],'cthunk')
profile.apply_time[node]=0.0
profile.outputs_size[node]=[0.0] * len(node.outputs)
# a thunk_group is a list of the thunks from each linker
# corresponding to the i'th position in the toposort.
assert len(ret.fn.thunk_groups[i])==1
profile.apply_cimpl[node] = hasattr(
ret.fn.thunk_groups[i][0],
'cthunk')
# Here we replace the linker function.
# This ugliness makes WrapLinker (an object that *generates*
# functions and is not function-specific) work with ProfileStats
# objects which are function-specific.
#capture old fn in closure. This is important since new_fn is about to
#take its place as ret.fn.
ret_fn = ret.fn
def new_fn():
self.mode.apply_time = self.mode.profile_stats[ret].apply_time
self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size
ret_fn()
# delete the old apply_time variable
# because it doesn't mean the same thing anymore.
# This prevents old code from looking like it still works.
del self.mode.apply_time
del self.mode.outputs_size
ret.fn = new_fn
return ret
class ProfileMode(Mode):
def __init__(self, linker=config.linker, optimizer=config.optimizer):
apply_time = {}
op_cimpl = {}
compile_time = 0 #time passed in theano.function()
fct_call_time = {}#time passed inside theano fct call including op time.
fct_call = {}
message=""
outputs_size={}
self.__setstate__((linker, optimizer, apply_time, op_cimpl,
compile_time, fct_call_time, fct_call, message, outputs_size))
profile_stats={}
self.__setstate__((linker,
optimizer,
message,
profile_stats))
def function_maker(self, i,o,m, *args, **kwargs):
"""Return an instance of `Profiler_Maker` which init the count"""
......@@ -59,28 +90,24 @@ class ProfileMode(Mode):
assert m is self
return Profile_Maker(i, o, self, *args, **kwargs)
local_time = property(lambda self: [sum(self.apply_time.values())])
def __get_local_time(self):
rval = 0
for ps in self.profile_stats.values():
rval += sum(ps.apply_time.values())
return rval
local_time = property(__get_local_time)
def __getstate__(self):
#print "__getstate__",self.provided_linker,self.provided_optimizer
return (self.provided_linker, self.provided_optimizer, self.apply_time,
self.op_cimpl, self.compile_time, self.fct_call_time,
self.fct_call, self.message, self.outputs_size)
return (self.provided_linker,
self.provided_optimizer,
self.message,
self.profile_stats)
def __setstate__(self, state):
linker, optimizer, apply_time, op_cimpl, compile_time, \
fct_call_time, fct_call, message, outputs_size = state
self.apply_time = apply_time
self.op_cimpl = op_cimpl
self.compile_time = compile_time
self.fct_call_time = fct_call_time
self.fct_call = fct_call
self.call_time = 0
self.fn_time = 0
self.optimizer_time = 0
self.linker_time = 0
self.message = ""
self.outputs_size = outputs_size
linker, optimizer, message, profile_stats = state
self.message = message
self.profile_stats = profile_stats
def profile_thunk(i, node, th):
""" Profile only the execution time
......@@ -102,7 +129,7 @@ class ProfileMode(Mode):
th()
dt = time.time() - t0
apply_time[(i,node)] += dt
self.apply_time[node] += max(dt, 1e-14)
def profile_thunk2(i, node, th):
......@@ -149,8 +176,8 @@ class ProfileMode(Mode):
else:
raise Exception("Can't determine the memory size of dtype",o[0].dtype)
size.append(s)
outputs_size[node]=size
apply_time[(i,node)] += dt
self.outputs_size[node]=size
self.apply_time[node] += max(dt, 1e-14)
self.provided_linker = linker
......@@ -182,21 +209,43 @@ class ProfileMode(Mode):
Currently there is n_apply_to_print, n_ops_to_print and min_memory_size
that are accepted.
"""
compile_time = sum([ps.compile_time for ps in self.profile_stats.values()])
fct_call = dict([(fn, ps.fct_callcount)
for (fn, ps) in self.profile_stats.items()])
fct_call_time = dict([(fn, ps.fct_call_time)
for (fn, ps) in self.profile_stats.items()])
apply_time = {}
for fn, ps in self.profile_stats.items():
for (i, node) in enumerate(fn.maker.env.toposort()):
apply_time[(i, node)] = ps.apply_time[node]
for (i,n),t in apply_time.items():
if t == 0:
print i, n
op_cimpl = {}
outputs_size = {}
for fn, ps in self.profile_stats.items():
op_cimpl.update(ps.apply_cimpl)
compile_time = self.compile_time
fct_call_time = self.fct_call_time
fct_call = self.fct_call
apply_time = self.apply_time
op_cimpl = self.op_cimpl
message = self.message
outputs_size = self.outputs_size
other_time = {'linker_time':self.linker_time,
'optimizer_time':self.optimizer_time}
self.print_summary_("print_summary", compile_time, fct_call_time, fct_call,
apply_time, op_cimpl, message, outputs_size, other_time,
**kwargs)
outputs_size = {}
for fn, ps in self.profile_stats.items():
outputs_size.update(ps.outputs_size)
other_time = dict(
linker_time = sum(
[ps.linker_time for ps in self.profile_stats.values()]),
optimizer_time = sum(
[ps.optimizer_time for ps in self.profile_stats.values()]))
self.print_summary_("print_summary", compile_time, fct_call_time, fct_call,
apply_time, op_cimpl, message, outputs_size,
self.local_time, other_time,
**kwargs)
def print_diff_summary(self, other, **kwargs):
""" As print_summary, but print the difference on two different profile mode.
......@@ -240,7 +289,7 @@ class ProfileMode(Mode):
@staticmethod
def print_summary_(fct_name, compile_time, fct_call_time, fct_call,
apply_time, op_cimpl, message, outputs_size,
other_time,
local_time, other_time,
n_apply_to_print=config.ProfileMode.n_apply_to_print,
n_ops_to_print=config.ProfileMode.n_ops_to_print,
print_apply=True,
......@@ -256,7 +305,6 @@ class ProfileMode(Mode):
whose outputs memory size is lower then that.
"""
local_time = sum(apply_time.values())
total_time = time.time() - import_time
total_fct_time = sum(fct_call_time.values())
total_fct_call = sum(fct_call.values())
......@@ -312,7 +360,7 @@ class ProfileMode(Mode):
op_time[op]+=t
nb_call = [v for k,v in fct_call.items() if k.maker.env is a.env][0]
if t==0:
assert nb_call == 0
assert nb_call == 0, nb_call
else:
op_call[op] += nb_call
op_apply[op] += 1
......@@ -429,8 +477,8 @@ class ProfileMode(Mode):
else:
fct_memory={}#env->dict(node->(outputs size))
var_mem = {}
for node,val in outputs_size.items():
fct_memory.setdefault(node.env,{})
for node, val in outputs_size.items():
fct_memory.setdefault(node.env, {})
fct_memory[node.env][node]=val
for out,v in zip(node.outputs,val):
var_mem[out]=v
......@@ -600,7 +648,7 @@ def atexit_print_default_profile_mode():
config.mode=PROFILE_MODE
"""
for prof_mode in prof_mode_instance_to_print:
if sum(prof_mode.apply_time.values())>0:
if prof_mode.local_time>0:
prof_mode.print_summary()
#Register atexit_print_default_profile_mode to have the summary of the
......
......@@ -17,11 +17,17 @@ __docformat__ = "restructuredtext en"
import atexit
import sys
import theano
from theano.configparser import AddConfigVar, StrParam, BoolParam
config = theano.config
_atexit_print_list = []
_atexit_print_file = sys.stderr
AddConfigVar('profiling.time_thunks',
"""Time individual thunks when profiling""",
BoolParam(True))
def _atexit_print_fn():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file
"""
......@@ -88,7 +94,9 @@ class ProfileStats(object):
linker_time = 0.0
# time spent linking graph (FunctionMaker.create)
def __init__(self, atexit_print=True, **kwargs):
# param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs):
"""
atexit_print - bool. True means that this object will be printed to
stderr (using .summary()) at the end of the program.
......@@ -99,6 +107,11 @@ class ProfileStats(object):
self.output_size = {}
self.apply_time = {}
self.apply_cimpl = {}
self.outputs_size = {}
if flag_time_thunks is None:
self.flag_time_thunks = config.profiling.time_thunks
else:
self.flag_time_thunks = flag_time_thunks
self.__dict__.update(kwargs)
#print >> sys.stderr, "self.message", self.message
if atexit_print:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论