提交 557fe6fa authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #2994 from t13m/opt_eqoptimizer_less_pass

Opt EquilibiumOptimizer to do less pass
...@@ -402,6 +402,9 @@ class FunctionGraph(utils.object2): ...@@ -402,6 +402,9 @@ class FunctionGraph(utils.object2):
if var.clients]): if var.clients]):
self.variables.remove(variable) self.variables.remove(variable)
# This allow to quickly know if a var is still in the fgraph
# or not.
del variable.fgraph
def __prune__(self, apply_node, reason=None): def __prune__(self, apply_node, reason=None):
"""Always called on owner of pruned variable from the graph. """Always called on owner of pruned variable from the graph.
...@@ -481,7 +484,7 @@ class FunctionGraph(utils.object2): ...@@ -481,7 +484,7 @@ class FunctionGraph(utils.object2):
verbose = config.optimizer_verbose verbose = config.optimizer_verbose
if verbose: if verbose:
print(reason, r, new_r) print(reason, r, new_r)
if r.fgraph is not self: if hasattr(r, 'fgraph') and r.fgraph is not self:
raise Exception("Cannot replace %s because it does not belong " raise Exception("Cannot replace %s because it does not belong "
"to this FunctionGraph" % r, str(reason)) "to this FunctionGraph" % r, str(reason))
if r.type != new_r.type: if r.type != new_r.type:
......
...@@ -597,10 +597,13 @@ class MergeOptimizer(Optimizer): ...@@ -597,10 +597,13 @@ class MergeOptimizer(Optimizer):
# doing the full cycle check. The full cycle check is # doing the full cycle check. The full cycle check is
# skipped by validate() if the graph don't contain # skipped by validate() if the graph don't contain
# destroyers. # destroyers.
node = pairs[0][0] var = pairs[0][0]
candidate = pairs[0][1] candidate = pairs[0][1]
if node.owner and candidate.owner: if (not hasattr(var, 'fgraph') or
node = node.owner not hasattr(candidate, 'fgraph')):
continue
if var.owner and candidate.owner:
node = var.owner
candidate = candidate.owner candidate = candidate.owner
inputs_match = all(node_in is cand_in inputs_match = all(node_in is cand_in
for node_in, cand_in in zip( for node_in, cand_in in zip(
...@@ -1690,7 +1693,8 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1690,7 +1693,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
optimizers, optimizers,
failure_callback=None, failure_callback=None,
ignore_newtrees=True, ignore_newtrees=True,
max_use_ratio=None): max_use_ratio=None,
final_optimizers=None):
""" Apply optimizations until equilibrium point. """ Apply optimizations until equilibrium point.
:param optimizers: list or set of local or global optimizations to :param optimizers: list or set of local or global optimizations to
...@@ -1710,6 +1714,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1710,6 +1714,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self.local_optimizers_map = dict() self.local_optimizers_map = dict()
self.local_optimizers_all = [] self.local_optimizers_all = []
self.global_optimizers = [] self.global_optimizers = []
self.final_optimizers = []
for opt in optimizers: for opt in optimizers:
if isinstance(opt, LocalOptimizer): if isinstance(opt, LocalOptimizer):
...@@ -1720,6 +1725,8 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1720,6 +1725,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
self.local_optimizers_map.setdefault(c, []).append(opt) self.local_optimizers_map.setdefault(c, []).append(opt)
else: else:
self.global_optimizers.append(opt) self.global_optimizers.append(opt)
if final_optimizers:
self.final_optimizers = final_optimizers
self.max_use_ratio = max_use_ratio self.max_use_ratio = max_use_ratio
assert self.max_use_ratio is not None, ( assert self.max_use_ratio is not None, (
'max_use_ratio has to be a number') 'max_use_ratio has to be a number')
...@@ -1741,6 +1748,8 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1741,6 +1748,8 @@ class EquilibriumOptimizer(NavigatorOptimizer):
opt.add_requirements(fgraph) opt.add_requirements(fgraph)
for opt in self.global_optimizers: for opt in self.global_optimizers:
opt.add_requirements(fgraph) opt.add_requirements(fgraph)
for opt in self.final_optimizers:
opt.add_requirements(fgraph)
def apply(self, fgraph, start_from=None): def apply(self, fgraph, start_from=None):
change_tracker = ChangeTracker() change_tracker = ChangeTracker()
...@@ -1766,7 +1775,9 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1766,7 +1775,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
io_toposort_timing = [] io_toposort_timing = []
nb_nodes = [] nb_nodes = []
node_created = {} node_created = {}
for opt in self.global_optimizers + list(self.get_local_optimizers()): for opt in (self.global_optimizers +
list(self.get_local_optimizers()) +
self.final_optimizers):
global_process_count.setdefault(opt, 0) global_process_count.setdefault(opt, 0)
time_opts.setdefault(opt, 0) time_opts.setdefault(opt, 0)
node_created.setdefault(opt, 0) node_created.setdefault(opt, 0)
...@@ -1845,6 +1856,27 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1845,6 +1856,27 @@ class EquilibriumOptimizer(NavigatorOptimizer):
finally: finally:
self.detach_updater(fgraph, u) self.detach_updater(fgraph, u)
# Apply final optimizers
t_before_final_opt = time.time()
for gopt in self.final_optimizers:
change_tracker.reset()
nb = change_tracker.nb_imported
t_opt = time.time()
gopt.apply(fgraph)
time_opts[gopt] += time.time() - t_opt
if change_tracker.changed:
process_count.setdefault(gopt, 0)
process_count[gopt] += 1
global_process_count[gopt] += 1
changed = True
node_created[gopt] += change_tracker.nb_imported - nb
if global_process_count[gopt] > max_use:
max_use_abort = True
opt_name = (getattr(gopt, "name", None)
or getattr(gopt, "__name__", ""))
global_opt_timing[-1] += time.time() - t_before_final_opt
loop_process_count.append(process_count) loop_process_count.append(process_count)
loop_timing.append(float(time.time() - t0)) loop_timing.append(float(time.time() - t0))
...@@ -1912,7 +1944,9 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1912,7 +1944,9 @@ class EquilibriumOptimizer(NavigatorOptimizer):
not_used = [] not_used = []
not_used_time = 0 not_used_time = 0
process_count = {} process_count = {}
for o in opt.global_optimizers + list(opt.get_local_optimizers()): for o in (opt.global_optimizers +
list(opt.get_local_optimizers()) +
opt.final_optimizers):
process_count.setdefault(o, 0) process_count.setdefault(o, 0)
for count in loop_process_count: for count in loop_process_count:
for o, v in count.iteritems(): for o, v in count.iteritems():
...@@ -1950,9 +1984,15 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -1950,9 +1984,15 @@ class EquilibriumOptimizer(NavigatorOptimizer):
prof2[0].get_local_optimizers()) prof2[0].get_local_optimizers())
global_optimizers = set(prof1[0].global_optimizers).union( global_optimizers = set(prof1[0].global_optimizers).union(
prof2[0].global_optimizers) prof2[0].global_optimizers)
if len(prof1[0].final_optimizers) > 0 or len(prof2[0].final_optimizers) > 0:
final_optimizers = set(prof1[0].final_optimizers).union(
prof2[0].final_optimizers)
else:
final_optimizers = None
new_opt = EquilibriumOptimizer( new_opt = EquilibriumOptimizer(
local_optimizers.union(global_optimizers), local_optimizers.union(global_optimizers),
max_use_ratio=1) max_use_ratio=1,
final_optimizers=final_optimizers)
def merge_list(l1, l2): def merge_list(l1, l2):
l = copy.copy(l1) l = copy.copy(l1)
......
...@@ -225,14 +225,31 @@ class EquilibriumDB(DB): ...@@ -225,14 +225,31 @@ class EquilibriumDB(DB):
def __init__(self, ignore_newtrees=True): def __init__(self, ignore_newtrees=True):
super(EquilibriumDB, self).__init__() super(EquilibriumDB, self).__init__()
self.ignore_newtrees = ignore_newtrees self.ignore_newtrees = ignore_newtrees
self.__final__ = {}
def register(self, name, obj, *tags, **kwtags):
# if name == 'cut_gpua_constant_transfers':
# import ipdb;ipdb.set_trace()
if 'final_opt' in kwtags:
final_opt = kwtags['final_opt']
kwtags.pop('final_opt', None)
else:
final_opt = False
super(EquilibriumDB, self).register(name, obj, *tags, **kwtags)
self.__final__[name] = final_opt
def query(self, *tags, **kwtags): def query(self, *tags, **kwtags):
opts = super(EquilibriumDB, self).query(*tags, **kwtags) _opts = super(EquilibriumDB, self).query(*tags, **kwtags)
final_opts = [o for o in _opts if self.__final__.get(o.name, False)]
opts = [o for o in _opts if o not in final_opts]
if len(final_opts) == 0:
final_opts = None
return opt.EquilibriumOptimizer( return opt.EquilibriumOptimizer(
opts, opts,
max_use_ratio=config.optdb.max_use_ratio, max_use_ratio=config.optdb.max_use_ratio,
ignore_newtrees=self.ignore_newtrees, ignore_newtrees=self.ignore_newtrees,
failure_callback=opt.NavigatorOptimizer.warn_inplace) failure_callback=opt.NavigatorOptimizer.warn_inplace,
final_optimizers=final_opts)
class SequenceDB(DB): class SequenceDB(DB):
......
...@@ -27,10 +27,11 @@ def register_opt(*tags, **kwargs): ...@@ -27,10 +27,11 @@ def register_opt(*tags, **kwargs):
if any([not isinstance(t, str) for t in tags]): if any([not isinstance(t, str) for t in tags]):
raise RuntimeError("Bad call to register_opt." raise RuntimeError("Bad call to register_opt."
" All tags must be strings.", tags) " All tags must be strings.", tags)
def f(local_opt): def f(local_opt):
name = (kwargs and kwargs.pop('name')) or local_opt.__name__ name = (kwargs and kwargs.pop('name')) or local_opt.__name__
gpu_optimizer.register(name, local_opt, 'fast_run', 'fast_compile', gpu_optimizer.register(name, local_opt, 'fast_run', 'fast_compile',
'gpu', *tags) 'gpu', *tags, **kwargs)
return local_opt return local_opt
return f return f
......
...@@ -94,13 +94,13 @@ optdb.register('gpu_after_fusion', ...@@ -94,13 +94,13 @@ optdb.register('gpu_after_fusion',
# Register merge_optimizer as a global opt # Register merge_optimizer as a global opt
gpu_optimizer.register('gpu_merge', theano.gof.opt.merge_optimizer, gpu_optimizer.register('gpu_merge', theano.gof.opt.merge_optimizer,
'fast_run', 'fast_compile') 'fast_run', 'fast_compile', final_opt=True)
# register local_track_shape_i at this level too # register local_track_shape_i at this level too
# to make multi-level lift of shape work. # to make multi-level lift of shape work.
register_opt()(theano.tensor.opt.local_track_shape_i) register_opt()(theano.tensor.opt.local_track_shape_i)
register_opt(name='gpu_constant_folding')( register_opt(final_opt=True, name='gpu_constant_folding')(
tensor.opt.constant_folding) tensor.opt.constant_folding)
register_opt()(theano.tensor.opt.local_subtensor_make_vector) register_opt()(theano.tensor.opt.local_subtensor_make_vector)
......
...@@ -380,7 +380,7 @@ def register_specialize_device(lopt, *tags, **kwargs): ...@@ -380,7 +380,7 @@ def register_specialize_device(lopt, *tags, **kwargs):
# Register merge_optimizer as a global opt during canonicalize # Register merge_optimizer as a global opt during canonicalize
compile.optdb['canonicalize'].register( compile.optdb['canonicalize'].register(
'canon_merge', merge_optimizer, 'fast_run') 'canon_merge', merge_optimizer, 'fast_run', final_opt=True)
##################### #####################
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论