提交 2677e15f authored 作者: James Bergstra's avatar James Bergstra

rewrite of ProfileMode to use ProfileStats

上级 5ccdcace
...@@ -15,7 +15,7 @@ from theano import gof ...@@ -15,7 +15,7 @@ from theano import gof
from theano.gof.python25 import partial from theano.gof.python25 import partial
import mode as mode_module import mode as mode_module
from io import In, SymbolicInput, SymbolicInputKit, SymbolicOutput from io import In, SymbolicInput, SymbolicInputKit, SymbolicOutput
from theano.configdefaults import config
import logging import logging
_logger = logging.getLogger('theano.compile.function_module') _logger = logging.getLogger('theano.compile.function_module')
...@@ -495,6 +495,7 @@ class Function(object): ...@@ -495,6 +495,7 @@ class Function(object):
return cpy return cpy
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
profile = self.profile
t0 = time.time() t0 = time.time()
# Reinitialize each container's 'provided' counter # Reinitialize each container's 'provided' counter
...@@ -536,8 +537,7 @@ class Function(object): ...@@ -536,8 +537,7 @@ class Function(object):
for k, arg in kwargs.iteritems(): for k, arg in kwargs.iteritems():
self[k] = arg self[k] = arg
if (not hasattr(self, '_check_for_aliased_inputs') or
if ( not hasattr(self, '_check_for_aliased_inputs') or
self._check_for_aliased_inputs): self._check_for_aliased_inputs):
## Collect aliased inputs among the storage space ## Collect aliased inputs among the storage space
args_share_memory = [] args_share_memory = []
...@@ -592,9 +592,21 @@ class Function(object): ...@@ -592,9 +592,21 @@ class Function(object):
self.inv_finder[c])) self.inv_finder[c]))
# Do the actual work # Do the actual work
t0_fn = time.time() if profile:
self.fn() t0_fn = time.time()
dt_fn = time.time() - t0_fn try:
self.fn()
except:
if hasattr(self.fn, 'position_of_error'):
# this is a new vm-provided function
# the C VM needs this because the exception manipulation
# done by raise_with_op is not implemented in C.
gof.vm.raise_with_op(self.fn.nodes[self.fn.position_of_error])
else:
# old-style linkers raise their own exceptions
raise
if profile:
profile.vm_call_time += time.time() - t0_fn
# Retrieve the values that were computed # Retrieve the values that were computed
outputs = [x.data for x in self.output_storage] outputs = [x.data for x in self.output_storage]
...@@ -626,20 +638,18 @@ class Function(object): ...@@ -626,20 +638,18 @@ class Function(object):
if isinstance(value, gof.Container): if isinstance(value, gof.Container):
value = value.storage[0] value = value.storage[0]
self[i] = value self[i] = value
# #
# NOTE: This logic needs to be replicated in # NOTE: This logic needs to be replicated in
# scan. # scan.
# grep for 'PROFILE_CODE' # grep for 'PROFILE_CODE'
# #
dt_call=time.time()-t0 if profile:
if hasattr(self.maker.mode,'fct_call_time'): dt_call=time.time()-t0
self.maker.mode.fct_call_time[self] += dt_call profile.fct_callcount += 1
self.maker.mode.fct_call[self] += 1 profile.fct_call_time += dt_call
if hasattr(self.fn, 'update_profile'):
self.maker.mode.call_time += dt_call self.fn.update_profile(profile)
self.maker.mode.fn_time += dt_fn
if self.return_none: if self.return_none:
return None return None
...@@ -687,9 +697,10 @@ def _pickle_Function(f): ...@@ -687,9 +697,10 @@ def _pickle_Function(f):
if (i < j) and isinstance(d_i, numpy.ndarray) and isinstance(d_j, numpy.ndarray): if (i < j) and isinstance(d_i, numpy.ndarray) and isinstance(d_j, numpy.ndarray):
if numpy.may_share_memory(d_i, d_j): if numpy.may_share_memory(d_i, d_j):
if f.pickle_aliased_memory_strategy == 'warn': if f.pickle_aliased_memory_strategy == 'warn':
_logger.warning('aliased relationship between Function arguments ' _logger.warning(('aliased relationship between'
'will not be preserved by un-pickling operation') ' Function arguments %s, %s'
#_logger.debug(str([d_i, d_j, id(d_i), id(d_j)])) ' will not be preserved by un-pickling'
' operation') %(str(d_i), str(d_j)))
else: else:
raise AliasedMemoryError(d_i, d_j) raise AliasedMemoryError(d_i, d_j)
...@@ -893,7 +904,8 @@ class FunctionMaker(object): ...@@ -893,7 +904,8 @@ class FunctionMaker(object):
raise TypeError("Unknown output type: %s (%s)", type(output), output) raise TypeError("Unknown output type: %s (%s)", type(output), output)
def __init__(self, inputs, outputs, def __init__(self, inputs, outputs,
mode = None, accept_inplace = False, function_builder = Function): mode = None, accept_inplace = False, function_builder = Function,
profile=None):
""" """
:type inputs: a list of SymbolicInput instances :type inputs: a list of SymbolicInput instances
...@@ -908,9 +920,18 @@ class FunctionMaker(object): ...@@ -908,9 +920,18 @@ class FunctionMaker(object):
:param accept_inplace: True iff it is acceptable to have inplace operations :param accept_inplace: True iff it is acceptable to have inplace operations
in the graph from the inputs to the outputs in the graph from the inputs to the outputs
""" """
mode = mode_module.get_mode(mode) mode = mode_module.get_mode(mode)
# figure out which profile object to use (if any)
# to help with forward-porting ProfileMode,
# we allow ProfileMode to provide a ProfileStats object
# using this somewhat awkward mechanism.
mode_profile = getattr(mode, 'profile', None)
if (profile is not None) and (mode_profile is not None):
raise TypeError(
'profile passed via both "mode" and "profile" arguments')
self.profile = profile = profile or mode_profile
# Handle the case where inputs and/or outputs is a single Variable (not in a list) # Handle the case where inputs and/or outputs is a single Variable (not in a list)
unpack_single = False unpack_single = False
return_none = False return_none = False
...@@ -951,7 +972,8 @@ class FunctionMaker(object): ...@@ -951,7 +972,8 @@ class FunctionMaker(object):
end_optimizer = time.time() end_optimizer = time.time()
finally: finally:
theano.config.compute_test_value = compute_test_value_orig theano.config.compute_test_value = compute_test_value_orig
mode.optimizer_time += end_optimizer - start_optimizer if profile:
profile.optimizer_time += end_optimizer - start_optimizer
_logger.debug('Optimizing took %f seconds' % (end_optimizer - start_optimizer)) _logger.debug('Optimizing took %f seconds' % (end_optimizer - start_optimizer))
#Add deep copy to respect the memory interface #Add deep copy to respect the memory interface
...@@ -1031,7 +1053,9 @@ class FunctionMaker(object): ...@@ -1031,7 +1053,9 @@ class FunctionMaker(object):
_fn, _i, _o = self.linker.make_thunk(input_storage = input_storage_lists) _fn, _i, _o = self.linker.make_thunk(input_storage = input_storage_lists)
end_linker = time.time() end_linker = time.time()
_logger.debug('Linker took %f seconds' % (end_linker - start_linker)) _logger.debug('Linker took %f seconds' % (end_linker - start_linker))
self.mode.linker_time += end_linker - start_linker if self.profile:
self.profile.linker_time += end_linker - start_linker
_fn.time_thunks = profile.flag_time_thunks
fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs, defaults, self.unpack_single, self.return_none, self) fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs, defaults, self.unpack_single, self.return_none, self)
return fn return fn
...@@ -1077,7 +1101,7 @@ def check_equal(x, y): ...@@ -1077,7 +1101,7 @@ def check_equal(x, y):
def register_checker(checker): def register_checker(checker):
__checkers.insert(0, checker) __checkers.insert(0, checker)
def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None): def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None, profile=None):
""" """
Return a Function that will calculate the outputs from the inputs. Return a Function that will calculate the outputs from the inputs.
...@@ -1105,6 +1129,8 @@ def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None) ...@@ -1105,6 +1129,8 @@ def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None)
:param accept_inplace: True iff the graph can contain inplace operations prior to the :param accept_inplace: True iff the graph can contain inplace operations prior to the
optimization phase (default is False) optimization phase (default is False)
:param profile: None or ProfileStats instance
""" """
#Every element of the input list will be upgraded to an `In` instance if necessary, #Every element of the input list will be upgraded to an `In` instance if necessary,
...@@ -1130,8 +1156,16 @@ def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None) ...@@ -1130,8 +1156,16 @@ def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None)
if not mode: if not mode:
raise ValueError("Please provide at least one mode.") raise ValueError("Please provide at least one mode.")
elif len(mode) == 1: elif len(mode) == 1:
fn = FunctionMaker(inputs, outputs, mode[0], accept_inplace = accept_inplace).create(defaults) fn = FunctionMaker(
inputs,
outputs,
mode[0],
accept_inplace = accept_inplace,
profile=profile).create(
defaults)
else: else:
if profile:
raise NotImplementedError('profiling not implemented in this kind of mode')
#return a different kind of function #return a different kind of function
def dup_defaults(): def dup_defaults():
# TODO This may need to be changed to use containers as defaults. # TODO This may need to be changed to use containers as defaults.
...@@ -1153,19 +1187,18 @@ def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None) ...@@ -1153,19 +1187,18 @@ def orig_function(inputs, outputs, mode=None, accept_inplace = False, name=None)
fn = maker1.create(defaults) fn = maker1.create(defaults)
else: else:
Maker = getattr(mode, 'function_maker', FunctionMaker) Maker = getattr(mode, 'function_maker', FunctionMaker)
fn = Maker(inputs, outputs, mode, accept_inplace = accept_inplace).create(defaults) fn = Maker(inputs,
outputs,
mode,
accept_inplace = accept_inplace,
profile=profile).create(
defaults)
t2 = time.time() t2 = time.time()
if hasattr(mode, 'compile_time'): if profile:
mode.compile_time+=t2-t1 profile.compile_time+=t2-t1
fn.name = name fn.name = name
if hasattr(mode,'fct_call_time'):
mode.fct_call_time.setdefault(fn,0)
if hasattr(mode,'fct_call'):
mode.fct_call.setdefault(fn,0)
return fn return fn
......
...@@ -8,6 +8,8 @@ from theano.configparser import config, AddConfigVar, IntParam, BoolParam ...@@ -8,6 +8,8 @@ from theano.configparser import config, AddConfigVar, IntParam, BoolParam
from theano.compile.function_module import FunctionMaker from theano.compile.function_module import FunctionMaker
run_cthunk = None # Will be imported only when needed. run_cthunk = None # Will be imported only when needed.
from profiling import ProfileStats
import_time = time.time() import_time = time.time()
AddConfigVar('ProfileMode.n_apply_to_print', AddConfigVar('ProfileMode.n_apply_to_print',
...@@ -34,24 +36,53 @@ AddConfigVar('ProfileMode.profile_memory', ...@@ -34,24 +36,53 @@ AddConfigVar('ProfileMode.profile_memory',
class Profile_Maker(FunctionMaker): class Profile_Maker(FunctionMaker):
def create(self, input_storage=None, trustme=False): def create(self, input_storage=None, trustme=False):
ret = super(Profile_Maker,self).create(input_storage, trustme) ret = super(Profile_Maker,self).create(input_storage, trustme)
# create a function-specific storage container for profiling info
profile = ProfileStats(atexit_print=False)
self.mode.profile_stats[ret] = profile
ret.profile = profile
#initialize the timers
for i, node in enumerate(ret.maker.env.toposort()): for i, node in enumerate(ret.maker.env.toposort()):
self.mode.apply_time[(i,node)]=0.0 profile.apply_time[node]=0.0
assert len(ret.fn.thunk_groups[i])==1 profile.outputs_size[node]=[0.0] * len(node.outputs)
self.mode.op_cimpl[node.op] = hasattr(ret.fn.thunk_groups[i][0],'cthunk')
# a thunk_group is a list of the thunks from each linker
# corresponding to the i'th position in the toposort.
assert len(ret.fn.thunk_groups[i])==1
profile.apply_cimpl[node] = hasattr(
ret.fn.thunk_groups[i][0],
'cthunk')
# Here we replace the linker function.
# This ugliness makes WrapLinker (an object that *generates*
# functions and is not function-specific) work with ProfileStats
# objects which are function-specific.
#capture old fn in closure. This is important since new_fn is about to
#take its place as ret.fn.
ret_fn = ret.fn
def new_fn():
self.mode.apply_time = self.mode.profile_stats[ret].apply_time
self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size
ret_fn()
# delete the old apply_time variable
# because it doesn't mean the same thing anymore.
# This prevents old code from looking like it still works.
del self.mode.apply_time
del self.mode.outputs_size
ret.fn = new_fn
return ret return ret
class ProfileMode(Mode): class ProfileMode(Mode):
def __init__(self, linker=config.linker, optimizer=config.optimizer): def __init__(self, linker=config.linker, optimizer=config.optimizer):
apply_time = {}
op_cimpl = {}
compile_time = 0 #time passed in theano.function()
fct_call_time = {}#time passed inside theano fct call including op time.
fct_call = {}
message="" message=""
outputs_size={} profile_stats={}
self.__setstate__((linker, optimizer, apply_time, op_cimpl, self.__setstate__((linker,
compile_time, fct_call_time, fct_call, message, outputs_size)) optimizer,
message,
profile_stats))
def function_maker(self, i,o,m, *args, **kwargs): def function_maker(self, i,o,m, *args, **kwargs):
"""Return an instance of `Profiler_Maker` which init the count""" """Return an instance of `Profiler_Maker` which init the count"""
...@@ -59,28 +90,24 @@ class ProfileMode(Mode): ...@@ -59,28 +90,24 @@ class ProfileMode(Mode):
assert m is self assert m is self
return Profile_Maker(i, o, self, *args, **kwargs) return Profile_Maker(i, o, self, *args, **kwargs)
local_time = property(lambda self: [sum(self.apply_time.values())]) def __get_local_time(self):
rval = 0
for ps in self.profile_stats.values():
rval += sum(ps.apply_time.values())
return rval
local_time = property(__get_local_time)
def __getstate__(self): def __getstate__(self):
#print "__getstate__",self.provided_linker,self.provided_optimizer #print "__getstate__",self.provided_linker,self.provided_optimizer
return (self.provided_linker, self.provided_optimizer, self.apply_time, return (self.provided_linker,
self.op_cimpl, self.compile_time, self.fct_call_time, self.provided_optimizer,
self.fct_call, self.message, self.outputs_size) self.message,
self.profile_stats)
def __setstate__(self, state): def __setstate__(self, state):
linker, optimizer, apply_time, op_cimpl, compile_time, \ linker, optimizer, message, profile_stats = state
fct_call_time, fct_call, message, outputs_size = state self.message = message
self.apply_time = apply_time self.profile_stats = profile_stats
self.op_cimpl = op_cimpl
self.compile_time = compile_time
self.fct_call_time = fct_call_time
self.fct_call = fct_call
self.call_time = 0
self.fn_time = 0
self.optimizer_time = 0
self.linker_time = 0
self.message = ""
self.outputs_size = outputs_size
def profile_thunk(i, node, th): def profile_thunk(i, node, th):
""" Profile only the execution time """ Profile only the execution time
...@@ -102,7 +129,7 @@ class ProfileMode(Mode): ...@@ -102,7 +129,7 @@ class ProfileMode(Mode):
th() th()
dt = time.time() - t0 dt = time.time() - t0
apply_time[(i,node)] += dt self.apply_time[node] += max(dt, 1e-14)
def profile_thunk2(i, node, th): def profile_thunk2(i, node, th):
...@@ -149,8 +176,8 @@ class ProfileMode(Mode): ...@@ -149,8 +176,8 @@ class ProfileMode(Mode):
else: else:
raise Exception("Can't determine the memory size of dtype",o[0].dtype) raise Exception("Can't determine the memory size of dtype",o[0].dtype)
size.append(s) size.append(s)
outputs_size[node]=size self.outputs_size[node]=size
apply_time[(i,node)] += dt self.apply_time[node] += max(dt, 1e-14)
self.provided_linker = linker self.provided_linker = linker
...@@ -182,21 +209,43 @@ class ProfileMode(Mode): ...@@ -182,21 +209,43 @@ class ProfileMode(Mode):
Currently there is n_apply_to_print, n_ops_to_print and min_memory_size Currently there is n_apply_to_print, n_ops_to_print and min_memory_size
that are accepted. that are accepted.
""" """
compile_time = sum([ps.compile_time for ps in self.profile_stats.values()])
fct_call = dict([(fn, ps.fct_callcount)
for (fn, ps) in self.profile_stats.items()])
fct_call_time = dict([(fn, ps.fct_call_time)
for (fn, ps) in self.profile_stats.items()])
apply_time = {}
for fn, ps in self.profile_stats.items():
for (i, node) in enumerate(fn.maker.env.toposort()):
apply_time[(i, node)] = ps.apply_time[node]
for (i,n),t in apply_time.items():
if t == 0:
print i, n
op_cimpl = {}
outputs_size = {}
for fn, ps in self.profile_stats.items():
op_cimpl.update(ps.apply_cimpl)
compile_time = self.compile_time
fct_call_time = self.fct_call_time
fct_call = self.fct_call
apply_time = self.apply_time
op_cimpl = self.op_cimpl
message = self.message message = self.message
outputs_size = self.outputs_size
other_time = {'linker_time':self.linker_time,
'optimizer_time':self.optimizer_time}
self.print_summary_("print_summary", compile_time, fct_call_time, fct_call, outputs_size = {}
apply_time, op_cimpl, message, outputs_size, other_time, for fn, ps in self.profile_stats.items():
**kwargs) outputs_size.update(ps.outputs_size)
other_time = dict(
linker_time = sum(
[ps.linker_time for ps in self.profile_stats.values()]),
optimizer_time = sum(
[ps.optimizer_time for ps in self.profile_stats.values()]))
self.print_summary_("print_summary", compile_time, fct_call_time, fct_call,
apply_time, op_cimpl, message, outputs_size,
self.local_time, other_time,
**kwargs)
def print_diff_summary(self, other, **kwargs): def print_diff_summary(self, other, **kwargs):
""" As print_summary, but print the difference on two different profile mode. """ As print_summary, but print the difference on two different profile mode.
...@@ -240,7 +289,7 @@ class ProfileMode(Mode): ...@@ -240,7 +289,7 @@ class ProfileMode(Mode):
@staticmethod @staticmethod
def print_summary_(fct_name, compile_time, fct_call_time, fct_call, def print_summary_(fct_name, compile_time, fct_call_time, fct_call,
apply_time, op_cimpl, message, outputs_size, apply_time, op_cimpl, message, outputs_size,
other_time, local_time, other_time,
n_apply_to_print=config.ProfileMode.n_apply_to_print, n_apply_to_print=config.ProfileMode.n_apply_to_print,
n_ops_to_print=config.ProfileMode.n_ops_to_print, n_ops_to_print=config.ProfileMode.n_ops_to_print,
print_apply=True, print_apply=True,
...@@ -256,7 +305,6 @@ class ProfileMode(Mode): ...@@ -256,7 +305,6 @@ class ProfileMode(Mode):
whose outputs memory size is lower then that. whose outputs memory size is lower then that.
""" """
local_time = sum(apply_time.values())
total_time = time.time() - import_time total_time = time.time() - import_time
total_fct_time = sum(fct_call_time.values()) total_fct_time = sum(fct_call_time.values())
total_fct_call = sum(fct_call.values()) total_fct_call = sum(fct_call.values())
...@@ -312,7 +360,7 @@ class ProfileMode(Mode): ...@@ -312,7 +360,7 @@ class ProfileMode(Mode):
op_time[op]+=t op_time[op]+=t
nb_call = [v for k,v in fct_call.items() if k.maker.env is a.env][0] nb_call = [v for k,v in fct_call.items() if k.maker.env is a.env][0]
if t==0: if t==0:
assert nb_call == 0 assert nb_call == 0, nb_call
else: else:
op_call[op] += nb_call op_call[op] += nb_call
op_apply[op] += 1 op_apply[op] += 1
...@@ -429,8 +477,8 @@ class ProfileMode(Mode): ...@@ -429,8 +477,8 @@ class ProfileMode(Mode):
else: else:
fct_memory={}#env->dict(node->(outputs size)) fct_memory={}#env->dict(node->(outputs size))
var_mem = {} var_mem = {}
for node,val in outputs_size.items(): for node, val in outputs_size.items():
fct_memory.setdefault(node.env,{}) fct_memory.setdefault(node.env, {})
fct_memory[node.env][node]=val fct_memory[node.env][node]=val
for out,v in zip(node.outputs,val): for out,v in zip(node.outputs,val):
var_mem[out]=v var_mem[out]=v
...@@ -600,7 +648,7 @@ def atexit_print_default_profile_mode(): ...@@ -600,7 +648,7 @@ def atexit_print_default_profile_mode():
config.mode=PROFILE_MODE config.mode=PROFILE_MODE
""" """
for prof_mode in prof_mode_instance_to_print: for prof_mode in prof_mode_instance_to_print:
if sum(prof_mode.apply_time.values())>0: if prof_mode.local_time>0:
prof_mode.print_summary() prof_mode.print_summary()
#Register atexit_print_default_profile_mode to have the summary of the #Register atexit_print_default_profile_mode to have the summary of the
......
...@@ -17,11 +17,17 @@ __docformat__ = "restructuredtext en" ...@@ -17,11 +17,17 @@ __docformat__ = "restructuredtext en"
import atexit import atexit
import sys import sys
import theano import theano
from theano.configparser import AddConfigVar, StrParam, BoolParam
config = theano.config config = theano.config
_atexit_print_list = [] _atexit_print_list = []
_atexit_print_file = sys.stderr _atexit_print_file = sys.stderr
AddConfigVar('profiling.time_thunks',
"""Time individual thunks when profiling""",
BoolParam(True))
def _atexit_print_fn(): def _atexit_print_fn():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file """Print ProfileStat objects in _atexit_print_list to _atexit_print_file
""" """
...@@ -88,7 +94,9 @@ class ProfileStats(object): ...@@ -88,7 +94,9 @@ class ProfileStats(object):
linker_time = 0.0 linker_time = 0.0
# time spent linking graph (FunctionMaker.create) # time spent linking graph (FunctionMaker.create)
def __init__(self, atexit_print=True, **kwargs): # param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs):
""" """
atexit_print - bool. True means that this object will be printed to atexit_print - bool. True means that this object will be printed to
stderr (using .summary()) at the end of the program. stderr (using .summary()) at the end of the program.
...@@ -99,6 +107,11 @@ class ProfileStats(object): ...@@ -99,6 +107,11 @@ class ProfileStats(object):
self.output_size = {} self.output_size = {}
self.apply_time = {} self.apply_time = {}
self.apply_cimpl = {} self.apply_cimpl = {}
self.outputs_size = {}
if flag_time_thunks is None:
self.flag_time_thunks = config.profiling.time_thunks
else:
self.flag_time_thunks = flag_time_thunks
self.__dict__.update(kwargs) self.__dict__.update(kwargs)
#print >> sys.stderr, "self.message", self.message #print >> sys.stderr, "self.message", self.message
if atexit_print: if atexit_print:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论