提交 53ba24bb authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #5037 from nouiz/inplace_profile

Inplace profile and profile merge crash fix.
......@@ -2623,7 +2623,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
final_optimizers=final_optimizers,
cleanup_optimizers=cleanup_optimizers)
def merge_list(l1, l2):
def add_append_list(l1, l2):
l = copy.copy(l1)
for idx, nb in enumerate(l2):
if idx < len(l):
......@@ -2632,7 +2632,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
l.append(nb)
return l
loop_timing = merge_list(prof1[1], prof2[1])
loop_timing = add_append_list(prof1[1], prof2[1])
loop_process_count = list(prof1[2])
global_sub_profs = []
......@@ -2668,23 +2668,30 @@ class EquilibriumOptimizer(NavigatorOptimizer):
final_sub_profs.append(merge(final_optimizers, 'final_optimizers', 10))
cleanup_sub_profs.append(merge(cleanup_optimizers, 'cleanup_optimizers', 11))
loop_process_count.extend(prof2[2][len(loop_process_count):])
# Add the iteration done by only one of the profile.
loop_process_count.extend(prof1[2][len(loop_process_count):])
global_sub_profs.extend(prof1[9][len(global_sub_profs):])
final_sub_profs.extend(prof1[10][len(final_sub_profs):])
cleanup_sub_profs.extend(prof1[11][len(cleanup_sub_profs):])
global_sub_profs.extend(prof2[9][len(loop_process_count):])
final_sub_profs.extend(prof2[10][len(loop_process_count):])
cleanup_sub_profs.extend(prof2[11][len(loop_process_count):])
max_nb_nodes = max(prof1[3], prof2[3])
global_opt_timing = merge_list(prof1[4], prof2[4])
global_opt_timing = add_append_list(prof1[4], prof2[4])
nb_nodes = merge_list(prof1[5], prof2[5])
nb_nodes = add_append_list(prof1[5], prof2[5])
time_opts = merge_dict(prof1[6], prof2[6])
io_toposort_timing = merge_list(prof1[7], prof2[7])
io_toposort_timing = add_append_list(prof1[7], prof2[7])
assert (len(loop_timing) == len(global_opt_timing) ==
len(global_sub_profs) ==
len(io_toposort_timing) == len(nb_nodes))
assert len(loop_timing) == max(len(prof1[1]), len(prof2[1]))
node_created = merge_dict(prof1[8], prof2[8])
return (new_opt,
loop_timing,
loop_process_count,
......
......@@ -743,7 +743,7 @@ optdb.register('gpua_elemwise_fusion',
tensor.opt.FusionOptimizer(gpu_local_elemwise_fusion), 49,
'fast_run', 'fusion', 'local_elemwise_fusion', 'gpuarray')
inplace_gpu_elemwise_opt = tensor.opt.inplace_elemwise_optimizer_op(
inplace_gpu_elemwise_opt = tensor.opt.InplaceElemwiseOptimizer(
GpuElemwise)
optdb.register('gpua_inplace_opt', inplace_gpu_elemwise_opt, 75,
'inplace_elemwise_optimizer', 'fast_run', 'inplace', 'gpuarray')
......
......@@ -482,7 +482,7 @@ class IgnorePrinter:
class DefaultPrinter:
def __init__(self):
pass
self.leaf_printer = LeafPrinter()
def process(self, output, pstate):
if output in pstate.memo:
......@@ -490,7 +490,7 @@ class DefaultPrinter:
pprinter = pstate.pprinter
node = output.owner
if node is None:
return LeafPrinter().process(output, pstate)
return self.leaf_printer.process(output, pstate)
r = "%s(%s)" % (str(node.op), ", ".join(
[pprinter.process(input, pstate.clone(precedence=-1000))
for input in node.inputs]))
......@@ -513,12 +513,13 @@ class LeafPrinter:
class PPrinter:
def __init__(self):
self.printers = []
self.printers_dict = {}
def assign(self, condition, printer):
if isinstance(condition, gof.Op):
op = condition
condition = (lambda pstate, r: r.owner is not None and
r.owner.op == op)
# condition can be a class or an instance of an Op.
if isinstance(condition, (gof.Op, type)):
self.printers_dict[condition] = printer
return
self.printers.insert(0, (condition, printer))
def process(self, r, pstate=None):
......@@ -526,6 +527,11 @@ class PPrinter:
pstate = PrinterState(pprinter=self)
elif isinstance(pstate, dict):
pstate = PrinterState(pprinter=self, **pstate)
if getattr(r, 'owner', None) is not None:
if r.owner.op in self.printers_dict:
return self.printers_dict[r.owner.op].process(r, pstate)
if type(r.owner.op) in self.printers_dict:
return self.printers_dict[type(r.owner.op)].process(r, pstate)
for condition, printer in self.printers:
if condition(pstate, r):
return printer.process(r, pstate)
......@@ -533,6 +539,7 @@ class PPrinter:
def clone(self):
cp = copy(self)
cp.printers = list(self.printers)
cp.printers_dict = dict(self.printers_dict)
return cp
def clone_assign(self, condition, printer):
......
......@@ -2181,7 +2181,7 @@ else:
71.00, 'fusion', 'local_elemwise_fusion')
# GpuElemwise inplace
gpu_inplace_elemwise_optimizer = tensor.opt.inplace_elemwise_optimizer_op(
gpu_inplace_elemwise_optimizer = tensor.opt.InplaceElemwiseOptimizer(
GpuElemwise)
# DO NOT PLACE add a 'gpu' tag here! This would enable it in fast_compile.
# It still will be run in fast_run with device=gpu with the current tag.
......
......@@ -4113,8 +4113,7 @@ class Join(Op):
join = Join()
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join),
printing.FunctionPrinter('join'))
pprint.assign(Join, printing.FunctionPrinter('join'))
def roll(x, shift, axis=None):
......
......@@ -446,8 +446,7 @@ class DimShufflePrinter:
else:
raise TypeError("Can only print DimShuffle.")
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, DimShuffle),
DimShufflePrinter())
pprint.assign(DimShuffle, DimShufflePrinter())
################
......
......@@ -26,12 +26,7 @@ def _scal_inplace(symbol):
rval.__epydoc_asRoutine = symbol
rval.__module__ = 'theano.tensor.inplace'
def chk(pstate, r):
if not r.owner:
return False
return r.owner.op == rval
pprint.assign(chk, printing.FunctionPrinter(symbolname.replace('_inplace', '=')))
pprint.assign(rval, printing.FunctionPrinter(symbolname.replace('_inplace', '=')))
return rval
......
......@@ -5,6 +5,7 @@ Tensor optimizations addressing the ops in basic.py.
# TODO: intelligent merge for mul/add
# TODO: 0*x -> 0
from collections import defaultdict
import logging
import itertools
import operator
......@@ -146,14 +147,34 @@ def broadcast_like(value, template, fgraph, dtype=None):
return rval
def inplace_elemwise_optimizer_op(OP):
class InplaceElemwiseOptimizer(Optimizer):
"""
We parametrise it to make it work for Elemwise and GpuElemwise op.
"""
@gof.inplace_optimizer
def inplace_elemwise_optimizer(fgraph):
def __init__(self, OP):
self.op = OP
def add_requirements(self, fgraph):
fgraph.attach_feature(theano.gof.destroyhandler.DestroyHandler())
@staticmethod
def print_profile(stream, prof, level=0):
blanc = (' ' * level)
print(blanc, "InplaceElemwiseOptimizer ", prof['opt'].op, file=stream)
for k in ['node_before',
'nb_call_replace',
'nb_call_validate',
'nb_inconsistent']:
print(blanc, k, prof[k], file=stream)
ndim = prof['ndim']
if ndim:
print(blanc, "ndim", "nb", file=stream)
for n in sorted(ndim.keys()):
print(blanc, n, ndim[n], file=stream)
def apply(self, fgraph):
"""
Usage: inplace_elemwise_optimizer.optimize(fgraph)
Usage: InplaceElemwiseOptimizer(op).optimize(fgraph)
Attempts to replace all Broadcast ops by versions of them
that operate inplace. It operates greedily: for each Broadcast
......@@ -163,8 +184,10 @@ def inplace_elemwise_optimizer_op(OP):
Examples
--------
x + y + z -> x += y += z
(x + y) * (x * y) -> (x += y) *= (x * y) or (x + y) *= (x *= y)
`x + y + z -> x += y += z`
`(x + y) * (x * y) -> (x += y) *= (x * y) or (x + y) *= (x *= y)`
"""
# We should not validate too often as this takes too much time to
......@@ -187,6 +210,13 @@ def inplace_elemwise_optimizer_op(OP):
# the solution is also applicable there.
# We execute `validate` after this number of change.
prof = {'opt': self,
'node_before': len(fgraph.apply_nodes),
'nb_call_replace': 0,
'nb_call_validate': 0,
'nb_inconsistent': 0,
'ndim': defaultdict(lambda: 0)}
check_each_change = config.tensor.insert_inplace_optimizer_validate_nb
if check_each_change == -1:
if len(fgraph.apply_nodes) > 500:
......@@ -210,7 +240,7 @@ def inplace_elemwise_optimizer_op(OP):
for node in list(graph.io_toposort(fgraph.inputs, fgraph.outputs)):
op = node.op
# gpuarray GpuElemwise inherit from Elemwise
if not type(op) == OP:
if not type(op) == self.op:
continue
# If big graph and the outputs are scalar, do not make it
# inplace.
......@@ -327,19 +357,23 @@ def inplace_elemwise_optimizer_op(OP):
scalar.transfer_type(
*[inplace_pattern.get(i, None)
for i in xrange(len(node.outputs))]))
new_outputs = OP(new_scal, inplace_pattern)(
new_outputs = self.op(new_scal, inplace_pattern)(
*node.inputs, **dict(return_list=True))
new_node = new_outputs[0].owner
for r, new_r in zip(node.outputs, new_outputs):
prof['nb_call_replace'] += 1
fgraph.replace(r, new_r,
reason="inplace_elemwise_optimizer")
nb_change_no_validate += 1
prof['ndim'][candidate_out_var.ndim] += 1
if nb_change_no_validate >= check_each_change:
prof['nb_call_validate'] += 1
fgraph.validate()
chk = fgraph.checkpoint()
nb_change_no_validate = 0
except (ValueError, InconsistencyError) as e:
prof['nb_inconsistent'] += 1
if check_each_change != 1 and not raised_warning:
print(("Some inplace optimization was not "
"performed due to unexpected error:"),
......@@ -362,9 +396,14 @@ def inplace_elemwise_optimizer_op(OP):
"performed due to unexpected error"),
file=sys.stderr)
fgraph.revert(chk)
return prof
def print_summary(self, stream=sys.stdout, level=0, depth=-1):
print("%s%s (%s)" % (
(' ' * level), self.__class__.__name__, self.op), file=stream)
return inplace_elemwise_optimizer
inplace_elemwise_optimizer = inplace_elemwise_optimizer_op(T.Elemwise)
inplace_elemwise_optimizer = InplaceElemwiseOptimizer(T.Elemwise)
compile.optdb.register('inplace_elemwise_opt', inplace_elemwise_optimizer, 75,
'inplace_opt', # for historic reason
'inplace_elemwise_optimizer',
......@@ -830,8 +869,7 @@ class MakeVectorPrinter:
else:
raise TypeError("Can only print make_vector.")
T.pprint.assign(lambda pstate, r: r.owner and
isinstance(r.owner.op, MakeVector), MakeVectorPrinter())
T.pprint.assign(MakeVector, MakeVectorPrinter())
class ShapeFeature(object):
......
......@@ -1002,8 +1002,7 @@ class SubtensorPrinter:
else:
raise TypeError("Can only print Subtensor.")
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor),
SubtensorPrinter())
pprint.assign(Subtensor, SubtensorPrinter())
def set_subtensor(x, y, inplace=False,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论