提交 bb096349 authored 作者: lamblin's avatar lamblin

Merge pull request #666 from nouiz/time_opt

Time opt
...@@ -994,11 +994,16 @@ class FunctionMaker(object): ...@@ -994,11 +994,16 @@ class FunctionMaker(object):
# we allow ProfileMode to provide a ProfileStats object # we allow ProfileMode to provide a ProfileStats object
# using this somewhat awkward mechanism. # using this somewhat awkward mechanism.
mode_profile = getattr(mode, 'profile', None) mode_profile = getattr(mode, 'profile', None)
if (profile is not None) and (mode_profile is not None): if (profile is not None and
profile is not False and
mode_profile is not None):
raise TypeError( raise TypeError(
'profile passed via both "mode" and "profile" arguments') 'profile passed via both "mode" and "profile" arguments')
self.profile = profile = profile or mode_profile self.profile = profile = profile or mode_profile
if profile:
# We preload the cache here to don't have its timming
# included in optimization that compile function.
theano.gof.cc.get_module_cache()
# Handle the case where inputs and/or outputs is a single Variable (not in a list) # Handle the case where inputs and/or outputs is a single Variable (not in a list)
self.orig_outputs = outputs self.orig_outputs = outputs
unpack_single = False unpack_single = False
...@@ -1030,6 +1035,8 @@ class FunctionMaker(object): ...@@ -1030,6 +1035,8 @@ class FunctionMaker(object):
# make the env (copies the graph, creates NEW INPUT AND OUTPUT VARIABLES) # make the env (copies the graph, creates NEW INPUT AND OUTPUT VARIABLES)
env, additional_outputs = std_env(expanded_inputs, outputs, accept_inplace) env, additional_outputs = std_env(expanded_inputs, outputs, accept_inplace)
env.profile = profile
self.env = env self.env = env
# Fetch the optimizer and linker # Fetch the optimizer and linker
...@@ -1042,13 +1049,15 @@ class FunctionMaker(object): ...@@ -1042,13 +1049,15 @@ class FunctionMaker(object):
theano.config.compute_test_value = "off" theano.config.compute_test_value = "off"
gof.Op.add_stack_trace_on_call = False gof.Op.add_stack_trace_on_call = False
start_optimizer = time.time() start_optimizer = time.time()
optimizer(env) optimizer_profile = optimizer(env)
end_optimizer = time.time() end_optimizer = time.time()
opt_time = end_optimizer - start_optimizer opt_time = end_optimizer - start_optimizer
mode.optimizer_time += opt_time mode.optimizer_time += opt_time
if profile: if profile:
profile.optimizer_time += opt_time profile.optimizer_time += opt_time
if theano.config.profile_optimizer:
profile.optimizer_profile = (optimizer, optimizer_profile)
_logger.debug('Optimizing took %f seconds', opt_time) _logger.debug('Optimizing took %f seconds', opt_time)
#Add deep copy to respect the memory interface #Add deep copy to respect the memory interface
......
...@@ -88,6 +88,10 @@ OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable') ...@@ -88,6 +88,10 @@ OPT_FAST_RUN_STABLE = OPT_FAST_RUN.requiring('stable')
OPT_FAST_COMPILE = gof.Query(include=['fast_compile']) OPT_FAST_COMPILE = gof.Query(include=['fast_compile'])
OPT_STABILIZE = gof.Query(include=['fast_run']) OPT_STABILIZE = gof.Query(include=['fast_run'])
OPT_STABILIZE.position_cutoff = 1.5000001 OPT_STABILIZE.position_cutoff = 1.5000001
OPT_FAST_RUN.name = 'OPT_FAST_RUN'
OPT_FAST_RUN_STABLE.name = 'OPT_FAST_RUN_STABLE'
OPT_FAST_COMPILE.name = 'OPT_FAST_COMPILE'
OPT_STABILIZE.name = 'OPT_STABILIZE'
predefined_optimizers = { predefined_optimizers = {
None: (lambda env: None), None: (lambda env: None),
......
...@@ -38,12 +38,14 @@ AddConfigVar('profiling.time_thunks', ...@@ -38,12 +38,14 @@ AddConfigVar('profiling.time_thunks',
def _atexit_print_fn(): def _atexit_print_fn():
"""Print ProfileStat objects in _atexit_print_list to _atexit_print_file """Print ProfileStat objects in _atexit_print_list to _atexit_print_file
""" """
printed = 0
for ps in _atexit_print_list: for ps in _atexit_print_list:
if ps.fct_callcount or ps.compile_time > 0: if ps.fct_callcount or ps.compile_time > 0:
ps.summary(file=_atexit_print_file) ps.summary(file=_atexit_print_file)
printed += 1
else: else:
print 'Skipping empty Profile' print 'Skipping empty Profile'
if len(_atexit_print_list) > 1: if printed > 1:
# Make a global profile # Make a global profile
cum = copy.copy(_atexit_print_list[0]) cum = copy.copy(_atexit_print_list[0])
cum.message = "Sum of all printed profiles at exit" cum.message = "Sum of all printed profiles at exit"
...@@ -51,14 +53,26 @@ def _atexit_print_fn(): ...@@ -51,14 +53,26 @@ def _atexit_print_fn():
# for ps in [ps for ps in _atexit_print_list[1:] # for ps in [ps for ps in _atexit_print_list[1:]
# if not isinstance(ps, ScanProfileStats)]: # if not isinstance(ps, ScanProfileStats)]:
for attr in ["compile_time", "fct_call_time", "fct_callcount", for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time"]: "vm_call_time", "optimizer_time", "linker_time",
"validate_time"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr)) setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
#merge dictonary
for attr in ["apply_time", "apply_callcount", for attr in ["apply_time", "apply_callcount",
"apply_cimpl", "outputs_size"]: "apply_cimpl", "outputs_size"]:
cum_attr = getattr(cum, attr) cum_attr = getattr(cum, attr)
for key, val in getattr(ps, attr).iteritems(): for key, val in getattr(ps, attr).iteritems():
assert key not in cum_attr assert key not in cum_attr
cum_attr[key] = val cum_attr[key] = val
if cum.optimizer_profile and ps.optimizer_profile:
merge = cum.optimizer_profile[0].merge_profile(
cum.optimizer_profile[1],
ps.optimizer_profile[1])
cum.optimizer_profile = (cum.optimizer_profile[0], merge)
else:
cum.optimizer_profile = None
cum.summary(file=_atexit_print_file) cum.summary(file=_atexit_print_file)
...@@ -118,11 +132,19 @@ class ProfileStats(object): ...@@ -118,11 +132,19 @@ class ProfileStats(object):
optimizer_time = 0.0 optimizer_time = 0.0
# time spent optimizing graph (FunctionMaker.__init__) # time spent optimizing graph (FunctionMaker.__init__)
validate_time = 0.0
# time spent in env.validate
# This is a subset of optimizer_time that is dominated by toposort()
# when the destorymap feature is included.
linker_time = 0.0 linker_time = 0.0
# time spent linking graph (FunctionMaker.create) # time spent linking graph (FunctionMaker.create)
line_width = 140 line_width = 140
optimizer_profile = None
# None or tuple (the optimizer, the profile it returned)
# param is called flag_time_thunks because most other attributes with time # param is called flag_time_thunks because most other attributes with time
# in the name are times *of* something, rather than configuration flags. # in the name are times *of* something, rather than configuration flags.
def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs): def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs):
...@@ -390,11 +412,15 @@ class ProfileStats(object): ...@@ -390,11 +412,15 @@ class ProfileStats(object):
local_time, 100*local_time / self.fct_call_time) local_time, 100*local_time / self.fct_call_time)
print >> file, ' Total compile time: %es' % self.compile_time print >> file, ' Total compile time: %es' % self.compile_time
print >> file, ' Theano Optimizer time: %es' % self.optimizer_time print >> file, ' Theano Optimizer time: %es' % self.optimizer_time
print >> file, ' Theano validate time: %es' % self.validate_time
print >> file, (' Theano Linker time (includes C,' print >> file, (' Theano Linker time (includes C,'
' CUDA code generation/compiling): %es' % ' CUDA code generation/compiling): %es' %
self.linker_time) self.linker_time)
print >> file, '' print >> file, ''
# The validation time is a subset of optimizer_time
assert self.validate_time < self.optimizer_time
def summary(self, file=sys.stderr, n_ops_to_print=20, def summary(self, file=sys.stderr, n_ops_to_print=20,
n_applies_to_print=20): n_applies_to_print=20):
self.summary_function(file) self.summary_function(file)
...@@ -402,9 +428,13 @@ class ProfileStats(object): ...@@ -402,9 +428,13 @@ class ProfileStats(object):
if local_time > 0: if local_time > 0:
self.summary_ops(file, n_ops_to_print) self.summary_ops(file, n_ops_to_print)
self.summary_nodes(file, n_applies_to_print) self.summary_nodes(file, n_applies_to_print)
else: elif self.fct_callcount > 0:
print >> file, (" No node time accumulated " print >> file, (" No node time accumulated "
"(hint: try config profiling.time_thunks=1)") "(hint: try config profiling.time_thunks=1)")
if self.optimizer_profile:
print "Optimizer Profile"
print "-----------------"
self.optimizer_profile[0].print_profile(file, self.optimizer_profile[1])
if 0: # old code still to be ported from ProfileMode if 0: # old code still to be ported from ProfileMode
......
...@@ -129,6 +129,7 @@ class Env(utils.object2): ...@@ -129,6 +129,7 @@ class Env(utils.object2):
self.node_locks = {} self.node_locks = {}
self.variable_locks = {} self.variable_locks = {}
self.profile = None
### Setup a Variable ### ### Setup a Variable ###
......
...@@ -567,7 +567,7 @@ def clone(i, o, copy_inputs = True): ...@@ -567,7 +567,7 @@ def clone(i, o, copy_inputs = True):
:type o: list :type o: list
:param o: output L{Variable}s :param o: output L{Variable}s
:type copy_inputs: bool :type copy_inputs: bool
:param copy_inputs: if True, the inputs will be copied (defaults to False) :param copy_inputs: if True, the inputs will be copied (defaults to True)
Returns the inputs and outputs of that copy. Returns the inputs and outputs of that copy.
""" """
......
...@@ -75,7 +75,7 @@ class Optimizer(object): ...@@ -75,7 +75,7 @@ class Optimizer(object):
opt.apply(env) opt.apply(env)
""" """
self.add_requirements(env) self.add_requirements(env)
self.apply(env, *args, **kwargs) return self.apply(env, *args, **kwargs)
def __call__(self, env): def __call__(self, env):
"""WRITEME """WRITEME
...@@ -98,6 +98,12 @@ class Optimizer(object): ...@@ -98,6 +98,12 @@ class Optimizer(object):
print >> stream, "%s%s %s id=%i" % ( print >> stream, "%s%s %s id=%i" % (
(' ' * level), self.__class__.__name__, name, id(self)) (' ' * level), self.__class__.__name__, name, id(self))
def print_profile(self, prof):
if prof is not None:
raise NotImplementedError(
"The function print_profile must be overrided if the"
" optimizer return profiling information.")
class FromFunctionOptimizer(Optimizer): class FromFunctionOptimizer(Optimizer):
"""WRITEME""" """WRITEME"""
...@@ -154,12 +160,16 @@ class SeqOptimizer(Optimizer, list): ...@@ -154,12 +160,16 @@ class SeqOptimizer(Optimizer, list):
Applies each L{Optimizer} in self in turn. Applies each L{Optimizer} in self in turn.
""" """
l = [] l = []
if env.profile:
validate_before = env.profile.validate_time
nb_node_before = len(env.nodes) nb_node_before = len(env.nodes)
sub_profs = []
for optimizer in self: for optimizer in self:
try: try:
t0 = time.time() t0 = time.time()
optimizer.optimize(env) sub_prof = optimizer.optimize(env)
l.append(float(time.time() - t0)) l.append(float(time.time() - t0))
sub_profs.append(sub_prof)
except AssertionError: except AssertionError:
# do not catch Assertion failures # do not catch Assertion failures
raise raise
...@@ -169,12 +179,14 @@ class SeqOptimizer(Optimizer, list): ...@@ -169,12 +179,14 @@ class SeqOptimizer(Optimizer, list):
continue continue
else: else:
raise raise
if config.time_seq_optimizer: if config.time_seq_optimizer:
print "SeqOptimizer", print "SeqOptimizer",
if hasattr(self,"name"): print self.name, if hasattr(self,"name"): print self.name,
elif hasattr(self,"__name__"): print self.__name__, elif hasattr(self,"__name__"): print self.__name__,
print " time %.3fs for %d/%d nodes before/after optimization"%(sum(l),nb_node_before,len(env.nodes)) print " time %.3fs for %d/%d nodes before/after optimization"%(sum(l),nb_node_before,len(env.nodes))
print " time %.3fs for validate " % (
env.profile.validate_time - validate_before)
ll=[] ll=[]
for opt in self: for opt in self:
if hasattr(opt,"__name__"): if hasattr(opt,"__name__"):
...@@ -191,6 +203,12 @@ class SeqOptimizer(Optimizer, list): ...@@ -191,6 +203,12 @@ class SeqOptimizer(Optimizer, list):
for (t, opt) in lll[::-1]: for (t, opt) in lll[::-1]:
print ' %.6fs - %s' % (t, opt) print ' %.6fs - %s' % (t, opt)
print print
if env.profile:
validate_time = env.profile.validate_time - validate_before
else:
validate_time = None
return (self, l, validate_time, nb_node_before,
len(env.nodes), sub_profs)
def __eq__(self, other): def __eq__(self, other):
#added to override the list's __eq__ implementation #added to override the list's __eq__ implementation
...@@ -216,6 +234,115 @@ class SeqOptimizer(Optimizer, list): ...@@ -216,6 +234,115 @@ class SeqOptimizer(Optimizer, list):
for opt in self: for opt in self:
opt.print_summary(stream, level=(level + 2), depth=depth) opt.print_summary(stream, level=(level + 2), depth=depth)
@staticmethod
def print_profile(stream, prof, level=0):
(opts, prof, validate_time, nb_node_before,
nb_node_after, sub_profs) = prof
blanc = (' ' * level)
print >> stream, blanc, "SeqOptimizer",
if hasattr(opts, "name"):
print >> stream, blanc, opts.name,
elif hasattr(opts, "__name__"):
print >> stream, blanc, opts.__name__,
print >> stream, (" time %.3fs for %d/%d nodes"
" before/after optimization" % (
sum(prof), nb_node_before, nb_node_after))
print >> stream, blanc, " %.3fs for env.validate()" % (validate_time)
if level == 0:
print >> stream, blanc, " time - (name, class, index)"
ll = []
for opt in opts:
if hasattr(opt, "__name__"):
ll.append((opt.__name__, opt.__class__.__name__,
opts.index(opt)))
else:
ll.append((opt.name, opt.__class__.__name__,
opts.index(opt)))
lll = zip(prof, ll)
def cmp(a, b):
if a[0] == b[0]:
return 0
elif a[0] < b[0]:
return -1
return 1
lll.sort(cmp)
for (t, opt) in lll[::-1]:
#if t < 1:
# continue
print >> stream, blanc, ' %.6fs - %s' % (t, opt)
if sub_profs[opt[-1]]:
opts[opt[-1]].print_profile(stream, sub_profs[opt[-1]],
level=level + 1)
print >> stream
@staticmethod
def merge_profile(prof1, prof2):
"""
Merge 2 profiles returned by this cass apply() fct.
"""
new_t = []
new_l = []
new_sub_profile = []
#merge common(same object) opt
for l in set(prof1[0]).intersection(set(prof2[0])):
idx1 = prof1[0].index(l)
idx2 = prof2[0].index(l)
new_t.append(prof1[1][idx1] +
prof2[1][idx2])
new_l.append(l)
if hasattr(l, 'merge_profile'):
assert len(prof1[5][idx1]) == len(prof2[5][idx1])
new_sub_profile.append(l.merge_profile(prof1[5][idx1],
prof2[5][idx2]))
else:
new_sub_profile.append(None)
# merge not common opt
import StringIO
for l in set(prof1[0]).symmetric_difference(set(prof2[0])):
#The set trick above only work for the same object optimization
#It don't work for equivalent optimization.
#So we try to merge equivalent optimization here.
new_l_names = [o.name for o in new_l]
if l.name in new_l_names:
idx = new_l_names.index(l.name)
io1 = StringIO.StringIO()
io2 = StringIO.StringIO()
l.print_summary(io1)
new_l[idx].print_summary(io2)
if io1.read() == io2.read():
if l in prof1[0]:
p = prof1
else:
p = prof2
new_t[idx] += p[1][p[0].index(l)]
if hasattr(l, 'merge_profile'):
assert len(p[5][p[0].index(l)]) == len(new_sub_profile[idx])
new_sub_profile[idx] = l.merge_profile(
new_sub_profile[idx], p[5][p[0].index(l)])
else:
new_sub_profile[idx] = None
continue
if l in prof1[0]:
p = prof1
else:
p = prof2
new_t.append(p[1][p[0].index(l)])
idx = p[0].index(l)
new_l.append(l)
new_sub_profile.append(p[5][idx])
new_opt = SeqOptimizer(*new_l)
assert set(prof1[0]).issubset(set(new_l))
# assert set(prof2[0]).issubset(set(new_l))
assert len(new_t) == len(new_opt) == len(new_sub_profile)
return (new_opt, new_t, prof1[2] + prof2[2],
-1, -1, new_sub_profile)
class _metadict: class _metadict:
"""WRITEME""" """WRITEME"""
...@@ -500,7 +627,9 @@ def MergeOptMerge(opt): ...@@ -500,7 +627,9 @@ def MergeOptMerge(opt):
opt introduced additional similarities. opt introduced additional similarities.
""" """
merger = merge_optimizer merger = merge_optimizer
return SeqOptimizer([merger, opt, merger]) opt = SeqOptimizer([merger, opt, merger])
opt.name = "MergeOptMerge"
return opt
def pre_constant_merge(vars): def pre_constant_merge(vars):
...@@ -1314,7 +1443,12 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1314,7 +1443,12 @@ class EquilibriumOptimizer(NavigatorOptimizer):
loop_timing = [] loop_timing = []
global_opt_timing = [] global_opt_timing = []
time_lopts = {}
io_toposort_timing = []
nb_nodes = [] nb_nodes = []
for lopt in self.local_optimizers:
process_count.setdefault(lopt, 0)
time_lopts.setdefault(lopt, 0)
while changed and not max_use_abort: while changed and not max_use_abort:
t0 = time.time() t0 = time.time()
...@@ -1333,7 +1467,9 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1333,7 +1467,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
for node in start_from: for node in start_from:
assert node in env.outputs assert node in env.outputs
topo_t0 = time.time()
q = deque(graph.io_toposort(env.inputs, start_from)) q = deque(graph.io_toposort(env.inputs, start_from))
io_toposort_timing.append(time.time() - topo_t0)
nb_nodes.append(len(q)) nb_nodes.append(len(q))
max_nb_nodes = max(max_nb_nodes, len(q)) max_nb_nodes = max(max_nb_nodes, len(q))
...@@ -1355,9 +1491,11 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1355,9 +1491,11 @@ class EquilibriumOptimizer(NavigatorOptimizer):
while q: while q:
node = q.pop() node = q.pop()
current_node = node current_node = node
for lopt in self.local_optimizers: for lopt in self.local_optimizers:
process_count.setdefault(lopt, 0) t_lopt = time.time()
lopt_change = self.process_node(env, node, lopt) lopt_change = self.process_node(env, node, lopt)
time_lopts[lopt] += time.time() - t_lopt
if lopt_change: if lopt_change:
process_count[lopt] += 1 process_count[lopt] += 1
changed = True changed = True
...@@ -1402,6 +1540,9 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1402,6 +1540,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
print ' %d - %s' % (count, opt) print ' %d - %s' % (count, opt)
print print
return (self, loop_timing, process_count, max_nb_nodes,
global_opt_timing, nb_nodes, time_lopts, io_toposort_timing)
def print_summary(self, stream=sys.stdout, level=0, depth=-1): def print_summary(self, stream=sys.stdout, level=0, depth=-1):
name = getattr(self, 'name', None) name = getattr(self, 'name', None)
print >> stream, "%s%s %s id=%i" % ( print >> stream, "%s%s %s id=%i" % (
...@@ -1411,6 +1552,95 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1411,6 +1552,95 @@ class EquilibriumOptimizer(NavigatorOptimizer):
lopt.print_summary(stream, level=(level + 2), lopt.print_summary(stream, level=(level + 2),
depth=(depth - 1)) depth=(depth - 1))
@staticmethod
def print_profile(stream, prof, level=0):
(opt, loop_timing, process_count, max_nb_nodes,
global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof
blanc = (' ' * level)
print >> stream, blanc, "EquilibriumOptimizer",
print >> stream, blanc, getattr(opt, "name",
getattr(opt, "__name__", ""))
print >> stream, blanc, " time %.3fs for %d passes, %d nodes max" % (
sum(loop_timing), len(loop_timing), max_nb_nodes)
print >> stream, blanc, " time io_toposort %.3fs" % sum(
io_toposort_timing)
for i in range(len(loop_timing)):
print >> stream, blanc, ('%d - %.3fs (%.3fs in global opts, '
'%.3fs io_toposort) - %d nodes' % (
i, loop_timing[i],
global_opt_timing[i],
io_toposort_timing[i], nb_nodes[i]))
count_opt = []
for opt, count in process_count.iteritems():
if count > 0:
count_opt.append((time_lopts[opt], count, opt))
if count_opt:
print >> stream, blanc, 'times applied - optimizer (only those applied):'
count_opt.sort()
for (t, count, opt) in count_opt[::-1]:
print >> stream, blanc, ' %.3fs - %d - %s' % (
t, count, opt)
print >> stream
@staticmethod
def merge_profile(prof1, prof2):
#(opt, loop_timing, process_count, max_nb_nodes,
# global_opt_timing, nb_nodes, time_lopts, io_toposort_timing) = prof1
local_optimizers = set(prof1[0].local_optimizers).union(
prof2[0].local_optimizers)
global_optimizers = set(prof1[0].global_optimizers).union(
prof2[0].global_optimizers)
new_opt = EquilibriumOptimizer(
local_optimizers.union(global_optimizers),
max_use_ratio=1)
def merge_list(l1, l2):
l = copy.copy(l1)
for idx, nb in enumerate(l2):
if idx < len(l):
l[idx] += nb
else:
l.append(nb)
return l
loop_timing = merge_list(prof1[1], prof2[1])
process_count = prof1[2].copy()
for process, count in prof2[2].iteritems():
if process in process_count:
process_count[process] += count
else:
process_count[process] = count
max_nb_nodes = max(prof1[3], prof2[3])
global_opt_timing = merge_list(prof1[4], prof2[4])
nb_nodes = merge_list(prof1[5], prof2[5])
time_lopts = prof1[6].copy()
for opt, t in prof2[6].iteritems():
if opt in time_lopts:
time_lopts[opt] += t
else:
time_lopts[opt] = t
io_toposort_timing = merge_list(prof1[7], prof2[7])
assert (len(loop_timing) == len(global_opt_timing) ==
len(io_toposort_timing) == len(nb_nodes))
assert len(loop_timing) == max(len(prof1[1]), len(prof2[1]))
return (new_opt,
loop_timing,
process_count,
max_nb_nodes,
global_opt_timing,
nb_nodes,
time_lopts,
io_toposort_timing)
################# #################
### Utilities ### ### Utilities ###
......
...@@ -229,7 +229,10 @@ class SequenceDB(DB): ...@@ -229,7 +229,10 @@ class SequenceDB(DB):
opts = [o for o in opts if self.__position__[o.name] < position_cutoff] opts = [o for o in opts if self.__position__[o.name] < position_cutoff]
opts.sort(key=lambda obj: self.__position__[obj.name]) opts.sort(key=lambda obj: self.__position__[obj.name])
return opt.SeqOptimizer(opts, failure_callback=self.failure_callback) ret = opt.SeqOptimizer(opts, failure_callback=self.failure_callback)
if hasattr(tags[0], 'name'):
ret.name = tags[0].name
return ret
def print_summary(self, stream=sys.stdout): def print_summary(self, stream=sys.stdout):
print >> stream, "SequenceDB (id %i)" % id(self) print >> stream, "SequenceDB (id %i)" % id(self)
......
import sys import sys
import time
from theano.gof.python25 import partial from theano.gof.python25 import partial
...@@ -71,10 +72,20 @@ class History: ...@@ -71,10 +72,20 @@ class History:
class Validator: class Validator:
def on_attach(self, env): def on_attach(self, env):
if hasattr(env, 'validate'): for attr in ('validate', 'validate_time'):
raise AlreadyThere("Validator feature is already present or in" if hasattr(env, attr):
" conflict with another plugin.") raise AlreadyThere("Validator feature is already present or in"
env.validate = lambda: env.execute_callbacks('validate') " conflict with another plugin.")
def validate():
t0 = time.time()
ret = env.execute_callbacks('validate')
t1 = time.time()
if env.profile:
env.profile.validate_time += t1 - t0
return ret
env.validate = validate
def consistent(): def consistent():
try: try:
......
...@@ -17,6 +17,9 @@ logger = logging.getLogger(__name__) ...@@ -17,6 +17,9 @@ logger = logging.getLogger(__name__)
AddConfigVar('profile', AddConfigVar('profile',
"If VM should collect profile information", "If VM should collect profile information",
BoolParam(False)) BoolParam(False))
AddConfigVar('profile_optimizer',
"If VM should collect optimizer profile information",
BoolParam(False))
raise_with_op = link.raise_with_op raise_with_op = link.raise_with_op
......
...@@ -24,7 +24,8 @@ def test_nvidia_driver1(): ...@@ -24,7 +24,8 @@ def test_nvidia_driver1():
""" """
a = numpy.random.rand(10000).astype("float32") a = numpy.random.rand(10000).astype("float32")
A = cuda.shared_constructor(a) A = cuda.shared_constructor(a)
f = theano.function(inputs=[], outputs=A.sum(), mode=mode_with_gpu) f = theano.function(inputs=[], outputs=A.sum(), mode=mode_with_gpu,
profile=False)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
assert len(topo) == 2 assert len(topo) == 2
assert sum(isinstance(node.op, B.GpuSum) for node in topo) == 1 assert sum(isinstance(node.op, B.GpuSum) for node in topo) == 1
...@@ -56,7 +57,8 @@ def test_nvidia_driver3(): ...@@ -56,7 +57,8 @@ def test_nvidia_driver3():
of the gpu device of the gpu device
""" """
var = cuda.fvector() var = cuda.fvector()
f = theano.function([var], var + 1, mode=mode_with_gpu) f = theano.function([var], var + 1, mode=mode_with_gpu,
profile=False)
topo = f.maker.env.toposort() topo = f.maker.env.toposort()
assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo]) assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
assert theano.sandbox.cuda.use.device_number is not None assert theano.sandbox.cuda.use.device_number is not None
......
...@@ -794,7 +794,8 @@ def scan(fn, ...@@ -794,7 +794,8 @@ def scan(fn,
updates=updates, updates=updates,
mode=compile.mode.Mode(linker='py', mode=compile.mode.Mode(linker='py',
optimizer=None), optimizer=None),
on_unused_input='ignore') on_unused_input='ignore',
profile=False)
## ##
### Step 5. Re-arange inputs of scan into a more strict order ### Step 5. Re-arange inputs of scan into a more strict order
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论