提交 bb096349 authored 作者: lamblin's avatar lamblin

Merge pull request #666 from nouiz/time_opt

Time opt
......@@ -994,11 +994,16 @@ class FunctionMaker(object):
# we allow ProfileMode to provide a ProfileStats object
# using this somewhat awkward mechanism.
mode_profile = getattr(mode, 'profile', None)
if (profile is not None) and (mode_profile is not None):
if (profile is not None and
profile is not False and
mode_profile is not None):
raise TypeError(
'profile passed via both "mode" and "profile" arguments')
self.profile = profile = profile or mode_profile
if profile:
# We preload the cache here to don't have its timming
# included in optimization that compile function.
theano.gof.cc.get_module_cache()
# Handle the case where inputs and/or outputs is a single Variable (not in a list)
self.orig_outputs = outputs
unpack_single = False
......@@ -1030,6 +1035,8 @@ class FunctionMaker(object):
# make the env (copies the graph, creates NEW INPUT AND OUTPUT VARIABLES)
env, additional_outputs = std_env(expanded_inputs, outputs, accept_inplace)
env.profile = profile
self.env = env
# Fetch the optimizer and linker
......@@ -1042,13 +1049,15 @@ class FunctionMaker(object):
theano.config.compute_test_value = "off"
gof.Op.add_stack_trace_on_call = False
start_optimizer = time.time()
optimizer(env)
optimizer_profile = optimizer(env)
end_optimizer = time.time()
opt_time = end_optimizer - start_optimizer
mode.optimizer_time += opt_time
if profile:
profile.optimizer_time += opt_time
if theano.config.profile_optimizer:
profile.optimizer_profile = (optimizer, optimizer_profile)
_logger.debug('Optimizing took %f seconds', opt_time)
#Add deep copy to respect the memory interface
......
......@@ -88,6 +88,10 @@ OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
OPT_FAST_COMPILE = gof.Query(include=['fast_compile'])
OPT_STABILIZE = gof.Query(include=['fast_run'])
OPT_STABILIZE.position_cutoff = 1.5000001
OPT_FAST_RUN.name = 'OPT_FAST_RUN'
OPT_FAST_RUN_STABLE.name = 'OPT_FAST_RUN_STABLE'
OPT_FAST_COMPILE.name = 'OPT_FAST_COMPILE'
OPT_STABILIZE.name = 'OPT_STABILIZE'
predefined_optimizers = {
None: (lambda env: None),
......
......@@ -38,12 +38,14 @@ AddConfigVar('profiling.time_thunks',
def _atexit_print_fn():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file
"""
printed = 0
for ps in _atexit_print_list:
if ps.fct_callcount or ps.compile_time > 0:
ps.summary(file=_atexit_print_file)
printed += 1
else:
print 'Skipping empty Profile'
if len(_atexit_print_list) > 1:
if printed > 1:
# Make a global profile
cum = copy.copy(_atexit_print_list[0])
cum.message = "Sum of all printed profiles at exit"
......@@ -51,14 +53,26 @@ def _atexit_print_fn():
# for ps in [ps for ps in _atexit_print_list[1:]
# if not isinstance(ps, ScanProfileStats)]:
for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time"]:
"vm_call_time", "optimizer_time", "linker_time",
"validate_time"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
#merge dictonary
for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "outputs_size"]:
cum_attr = getattr(cum, attr)
for key, val in getattr(ps, attr).iteritems():
assert key not in cum_attr
cum_attr[key] = val
if cum.optimizer_profile and ps.optimizer_profile:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1],
ps.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
else:
cum.optimizer_profile = None
cum.summary(file=_atexit_print_file)
......@@ -118,11 +132,19 @@ class ProfileStats(object):
optimizer_time = 0.0
# time spent optimizing graph (FunctionMaker.__init__)
validate_time = 0.0
# time spent in env.validate
# This is a subset of optimizer_time that is dominated by toposort()
# when the destorymap feature is included.
linker_time = 0.0
# time spent linking graph (FunctionMaker.create)
line_width = 140
optimizer_profile = None
# None or tuple (the optimizer, the profile it returned)
# param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags.
def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs):
......@@ -390,11 +412,15 @@ class ProfileStats(object):
local_time, 100*local_time / self.fct_call_time)
print >> file, ' Total compile time: %es' % self.compile_time
print >> file, ' Theano Optimizer time: %es' % self.optimizer_time
print >> file, ' Theano validate time: %es' % self.validate_time
print >> file, (' Theano Linker time (includes C,'
' CUDA code generation/compiling): %es' %
self.linker_time)
print >> file, ''
# The validation time is a subset of optimizer_time
assert self.validate_time < self.optimizer_time
def summary(self, file=sys.stderr, n_ops_to_print=20,
n_applies_to_print=20):
self.summary_function(file)
......@@ -402,9 +428,13 @@ class ProfileStats(object):
if local_time > 0:
self.summary_ops(file, n_ops_to_print)
self.summary_nodes(file, n_applies_to_print)
else:
elif self.fct_callcount > 0:
print >> file, (" No node time accumulated "
"(hint: try config profiling.time_thunks=1)")
if self.optimizer_profile:
print "Optimizer Profile"
print "-----------------"
self.optimizer_profile[0].print_profile(file, self.optimizer_profile[1])
if 0: # old code still to be ported from ProfileMode
......
......@@ -129,6 +129,7 @@ class Env(utils.object2):
self.node_locks = {}
self.variable_locks = {}
self.profile = None
### Setup a Variable ###
......
......@@ -567,7 +567,7 @@ def clone(i, o, copy_inputs = True):
:type o: list
:param o: output L{Variable}s
:type copy_inputs: bool
:param copy_inputs: if True, the inputs will be copied (defaults to False)
:param copy_inputs: if True, the inputs will be copied (defaults to True)
Returns the inputs and outputs of that copy.
"""
......
差异被折叠。
......@@ -229,7 +229,10 @@ class SequenceDB(DB):
opts = [o for o in opts if self.__position__[o.name] < position_cutoff]
opts.sort(key=lambda obj: self.__position__[obj.name])
return opt.SeqOptimizer(opts, failure_callback=self.failure_callback)
ret = opt.SeqOptimizer(opts, failure_callback=self.failure_callback)
if hasattr(tags[0], 'name'):
ret.name = tags[0].name
return ret
def print_summary(self, stream=sys.stdout):
print >> stream, "SequenceDB (id %i)" % id(self)
......
import sys
import time
from theano.gof.python25 import partial
......@@ -71,10 +72,20 @@ class History:
class Validator:
def on_attach(self, env):
if hasattr(env, 'validate'):
raise AlreadyThere("Validator feature is already present or in"
" conflict with another plugin.")
env.validate = lambda: env.execute_callbacks('validate')
for attr in ('validate', 'validate_time'):
if hasattr(env, attr):
raise AlreadyThere("Validator feature is already present or in"
" conflict with another plugin.")
def validate():
t0 = time.time()
ret = env.execute_callbacks('validate')
t1 = time.time()
if env.profile:
env.profile.validate_time += t1 - t0
return ret
env.validate = validate
def consistent():
try:
......
......@@ -17,6 +17,9 @@ logger = logging.getLogger(__name__)
AddConfigVar('profile',
"If VM should collect profile information",
BoolParam(False))
AddConfigVar('profile_optimizer',
"If VM should collect optimizer profile information",
BoolParam(False))
raise_with_op = link.raise_with_op
......
......@@ -24,7 +24,8 @@ def test_nvidia_driver1():
"""
a = numpy.random.rand(10000).astype("float32")
A = cuda.shared_constructor(a)
f = theano.function(inputs=[], outputs=A.sum(), mode=mode_with_gpu)
f = theano.function(inputs=[], outputs=A.sum(), mode=mode_with_gpu,
profile=False)
topo = f.maker.env.toposort()
assert len(topo) == 2
assert sum(isinstance(node.op, B.GpuSum) for node in topo) == 1
......@@ -56,7 +57,8 @@ def test_nvidia_driver3():
of the gpu device
"""
var = cuda.fvector()
f = theano.function([var], var + 1, mode=mode_with_gpu)
f = theano.function([var], var + 1, mode=mode_with_gpu,
profile=False)
topo = f.maker.env.toposort()
assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
assert theano.sandbox.cuda.use.device_number is not None
......
......@@ -794,7 +794,8 @@ def scan(fn,
updates=updates,
mode=compile.mode.Mode(linker='py',
optimizer=None),
on_unused_input='ignore')
on_unused_input='ignore',
profile=False)
##
### Step 5. Re-arange inputs of scan into a more strict order
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论