提交 53ba24bb authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #5037 from nouiz/inplace_profile

Inplace profile and profile merge crash fix.
...@@ -2623,7 +2623,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -2623,7 +2623,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
final_optimizers=final_optimizers, final_optimizers=final_optimizers,
cleanup_optimizers=cleanup_optimizers) cleanup_optimizers=cleanup_optimizers)
def merge_list(l1, l2): def add_append_list(l1, l2):
l = copy.copy(l1) l = copy.copy(l1)
for idx, nb in enumerate(l2): for idx, nb in enumerate(l2):
if idx < len(l): if idx < len(l):
...@@ -2632,7 +2632,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -2632,7 +2632,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
l.append(nb) l.append(nb)
return l return l
loop_timing = merge_list(prof1[1], prof2[1]) loop_timing = add_append_list(prof1[1], prof2[1])
loop_process_count = list(prof1[2]) loop_process_count = list(prof1[2])
global_sub_profs = [] global_sub_profs = []
...@@ -2668,23 +2668,30 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -2668,23 +2668,30 @@ class EquilibriumOptimizer(NavigatorOptimizer):
final_sub_profs.append(merge(final_optimizers, 'final_optimizers', 10)) final_sub_profs.append(merge(final_optimizers, 'final_optimizers', 10))
cleanup_sub_profs.append(merge(cleanup_optimizers, 'cleanup_optimizers', 11)) cleanup_sub_profs.append(merge(cleanup_optimizers, 'cleanup_optimizers', 11))
loop_process_count.extend(prof2[2][len(loop_process_count):]) # Add the iteration done by only one of the profile.
loop_process_count.extend(prof1[2][len(loop_process_count):])
global_sub_profs.extend(prof1[9][len(global_sub_profs):])
final_sub_profs.extend(prof1[10][len(final_sub_profs):])
cleanup_sub_profs.extend(prof1[11][len(cleanup_sub_profs):])
global_sub_profs.extend(prof2[9][len(loop_process_count):])
final_sub_profs.extend(prof2[10][len(loop_process_count):])
cleanup_sub_profs.extend(prof2[11][len(loop_process_count):])
max_nb_nodes = max(prof1[3], prof2[3]) max_nb_nodes = max(prof1[3], prof2[3])
global_opt_timing = merge_list(prof1[4], prof2[4]) global_opt_timing = add_append_list(prof1[4], prof2[4])
nb_nodes = merge_list(prof1[5], prof2[5]) nb_nodes = add_append_list(prof1[5], prof2[5])
time_opts = merge_dict(prof1[6], prof2[6]) time_opts = merge_dict(prof1[6], prof2[6])
io_toposort_timing = merge_list(prof1[7], prof2[7]) io_toposort_timing = add_append_list(prof1[7], prof2[7])
assert (len(loop_timing) == len(global_opt_timing) == assert (len(loop_timing) == len(global_opt_timing) ==
len(global_sub_profs) ==
len(io_toposort_timing) == len(nb_nodes)) len(io_toposort_timing) == len(nb_nodes))
assert len(loop_timing) == max(len(prof1[1]), len(prof2[1])) assert len(loop_timing) == max(len(prof1[1]), len(prof2[1]))
node_created = merge_dict(prof1[8], prof2[8]) node_created = merge_dict(prof1[8], prof2[8])
return (new_opt, return (new_opt,
loop_timing, loop_timing,
loop_process_count, loop_process_count,
......
...@@ -743,7 +743,7 @@ optdb.register('gpua_elemwise_fusion', ...@@ -743,7 +743,7 @@ optdb.register('gpua_elemwise_fusion',
tensor.opt.FusionOptimizer(gpu_local_elemwise_fusion), 49, tensor.opt.FusionOptimizer(gpu_local_elemwise_fusion), 49,
'fast_run', 'fusion', 'local_elemwise_fusion', 'gpuarray') 'fast_run', 'fusion', 'local_elemwise_fusion', 'gpuarray')
inplace_gpu_elemwise_opt = tensor.opt.inplace_elemwise_optimizer_op( inplace_gpu_elemwise_opt = tensor.opt.InplaceElemwiseOptimizer(
GpuElemwise) GpuElemwise)
optdb.register('gpua_inplace_opt', inplace_gpu_elemwise_opt, 75, optdb.register('gpua_inplace_opt', inplace_gpu_elemwise_opt, 75,
'inplace_elemwise_optimizer', 'fast_run', 'inplace', 'gpuarray') 'inplace_elemwise_optimizer', 'fast_run', 'inplace', 'gpuarray')
......
...@@ -482,7 +482,7 @@ class IgnorePrinter: ...@@ -482,7 +482,7 @@ class IgnorePrinter:
class DefaultPrinter: class DefaultPrinter:
def __init__(self): def __init__(self):
pass self.leaf_printer = LeafPrinter()
def process(self, output, pstate): def process(self, output, pstate):
if output in pstate.memo: if output in pstate.memo:
...@@ -490,7 +490,7 @@ class DefaultPrinter: ...@@ -490,7 +490,7 @@ class DefaultPrinter:
pprinter = pstate.pprinter pprinter = pstate.pprinter
node = output.owner node = output.owner
if node is None: if node is None:
return LeafPrinter().process(output, pstate) return self.leaf_printer.process(output, pstate)
r = "%s(%s)" % (str(node.op), ", ".join( r = "%s(%s)" % (str(node.op), ", ".join(
[pprinter.process(input, pstate.clone(precedence=-1000)) [pprinter.process(input, pstate.clone(precedence=-1000))
for input in node.inputs])) for input in node.inputs]))
...@@ -513,12 +513,13 @@ class LeafPrinter: ...@@ -513,12 +513,13 @@ class LeafPrinter:
class PPrinter: class PPrinter:
def __init__(self): def __init__(self):
self.printers = [] self.printers = []
self.printers_dict = {}
def assign(self, condition, printer): def assign(self, condition, printer):
if isinstance(condition, gof.Op): # condition can be a class or an instance of an Op.
op = condition if isinstance(condition, (gof.Op, type)):
condition = (lambda pstate, r: r.owner is not None and self.printers_dict[condition] = printer
r.owner.op == op) return
self.printers.insert(0, (condition, printer)) self.printers.insert(0, (condition, printer))
def process(self, r, pstate=None): def process(self, r, pstate=None):
...@@ -526,6 +527,11 @@ class PPrinter: ...@@ -526,6 +527,11 @@ class PPrinter:
pstate = PrinterState(pprinter=self) pstate = PrinterState(pprinter=self)
elif isinstance(pstate, dict): elif isinstance(pstate, dict):
pstate = PrinterState(pprinter=self, **pstate) pstate = PrinterState(pprinter=self, **pstate)
if getattr(r, 'owner', None) is not None:
if r.owner.op in self.printers_dict:
return self.printers_dict[r.owner.op].process(r, pstate)
if type(r.owner.op) in self.printers_dict:
return self.printers_dict[type(r.owner.op)].process(r, pstate)
for condition, printer in self.printers: for condition, printer in self.printers:
if condition(pstate, r): if condition(pstate, r):
return printer.process(r, pstate) return printer.process(r, pstate)
...@@ -533,6 +539,7 @@ class PPrinter: ...@@ -533,6 +539,7 @@ class PPrinter:
def clone(self): def clone(self):
cp = copy(self) cp = copy(self)
cp.printers = list(self.printers) cp.printers = list(self.printers)
cp.printers_dict = dict(self.printers_dict)
return cp return cp
def clone_assign(self, condition, printer): def clone_assign(self, condition, printer):
......
...@@ -2181,7 +2181,7 @@ else: ...@@ -2181,7 +2181,7 @@ else:
71.00, 'fusion', 'local_elemwise_fusion') 71.00, 'fusion', 'local_elemwise_fusion')
# GpuElemwise inplace # GpuElemwise inplace
gpu_inplace_elemwise_optimizer = tensor.opt.inplace_elemwise_optimizer_op( gpu_inplace_elemwise_optimizer = tensor.opt.InplaceElemwiseOptimizer(
GpuElemwise) GpuElemwise)
# DO NOT PLACE add a 'gpu' tag here! This would enable it in fast_compile. # DO NOT PLACE add a 'gpu' tag here! This would enable it in fast_compile.
# It still will be run in fast_run with device=gpu with the current tag. # It still will be run in fast_run with device=gpu with the current tag.
......
...@@ -4113,8 +4113,7 @@ class Join(Op): ...@@ -4113,8 +4113,7 @@ class Join(Op):
join = Join() join = Join()
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Join), pprint.assign(Join, printing.FunctionPrinter('join'))
printing.FunctionPrinter('join'))
def roll(x, shift, axis=None): def roll(x, shift, axis=None):
......
...@@ -446,8 +446,7 @@ class DimShufflePrinter: ...@@ -446,8 +446,7 @@ class DimShufflePrinter:
else: else:
raise TypeError("Can only print DimShuffle.") raise TypeError("Can only print DimShuffle.")
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, DimShuffle), pprint.assign(DimShuffle, DimShufflePrinter())
DimShufflePrinter())
################ ################
......
...@@ -26,12 +26,7 @@ def _scal_inplace(symbol): ...@@ -26,12 +26,7 @@ def _scal_inplace(symbol):
rval.__epydoc_asRoutine = symbol rval.__epydoc_asRoutine = symbol
rval.__module__ = 'theano.tensor.inplace' rval.__module__ = 'theano.tensor.inplace'
def chk(pstate, r): pprint.assign(rval, printing.FunctionPrinter(symbolname.replace('_inplace', '=')))
if not r.owner:
return False
return r.owner.op == rval
pprint.assign(chk, printing.FunctionPrinter(symbolname.replace('_inplace', '=')))
return rval return rval
......
...@@ -5,6 +5,7 @@ Tensor optimizations addressing the ops in basic.py. ...@@ -5,6 +5,7 @@ Tensor optimizations addressing the ops in basic.py.
# TODO: intelligent merge for mul/add # TODO: intelligent merge for mul/add
# TODO: 0*x -> 0 # TODO: 0*x -> 0
from collections import defaultdict
import logging import logging
import itertools import itertools
import operator import operator
...@@ -146,14 +147,34 @@ def broadcast_like(value, template, fgraph, dtype=None): ...@@ -146,14 +147,34 @@ def broadcast_like(value, template, fgraph, dtype=None):
return rval return rval
def inplace_elemwise_optimizer_op(OP): class InplaceElemwiseOptimizer(Optimizer):
""" """
We parametrise it to make it work for Elemwise and GpuElemwise op. We parametrise it to make it work for Elemwise and GpuElemwise op.
""" """
@gof.inplace_optimizer def __init__(self, OP):
def inplace_elemwise_optimizer(fgraph): self.op = OP
def add_requirements(self, fgraph):
fgraph.attach_feature(theano.gof.destroyhandler.DestroyHandler())
@staticmethod
def print_profile(stream, prof, level=0):
blanc = (' ' * level)
print(blanc, "InplaceElemwiseOptimizer ", prof['opt'].op, file=stream)
for k in ['node_before',
'nb_call_replace',
'nb_call_validate',
'nb_inconsistent']:
print(blanc, k, prof[k], file=stream)
ndim = prof['ndim']
if ndim:
print(blanc, "ndim", "nb", file=stream)
for n in sorted(ndim.keys()):
print(blanc, n, ndim[n], file=stream)
def apply(self, fgraph):
""" """
Usage: inplace_elemwise_optimizer.optimize(fgraph) Usage: InplaceElemwiseOptimizer(op).optimize(fgraph)
Attempts to replace all Broadcast ops by versions of them Attempts to replace all Broadcast ops by versions of them
that operate inplace. It operates greedily: for each Broadcast that operate inplace. It operates greedily: for each Broadcast
...@@ -163,8 +184,10 @@ def inplace_elemwise_optimizer_op(OP): ...@@ -163,8 +184,10 @@ def inplace_elemwise_optimizer_op(OP):
Examples Examples
-------- --------
x + y + z -> x += y += z
(x + y) * (x * y) -> (x += y) *= (x * y) or (x + y) *= (x *= y) `x + y + z -> x += y += z`
`(x + y) * (x * y) -> (x += y) *= (x * y) or (x + y) *= (x *= y)`
""" """
# We should not validate too often as this takes too much time to # We should not validate too often as this takes too much time to
...@@ -187,6 +210,13 @@ def inplace_elemwise_optimizer_op(OP): ...@@ -187,6 +210,13 @@ def inplace_elemwise_optimizer_op(OP):
# the solution is also applicable there. # the solution is also applicable there.
# We execute `validate` after this number of change. # We execute `validate` after this number of change.
prof = {'opt': self,
'node_before': len(fgraph.apply_nodes),
'nb_call_replace': 0,
'nb_call_validate': 0,
'nb_inconsistent': 0,
'ndim': defaultdict(lambda: 0)}
check_each_change = config.tensor.insert_inplace_optimizer_validate_nb check_each_change = config.tensor.insert_inplace_optimizer_validate_nb
if check_each_change == -1: if check_each_change == -1:
if len(fgraph.apply_nodes) > 500: if len(fgraph.apply_nodes) > 500:
...@@ -210,7 +240,7 @@ def inplace_elemwise_optimizer_op(OP): ...@@ -210,7 +240,7 @@ def inplace_elemwise_optimizer_op(OP):
for node in list(graph.io_toposort(fgraph.inputs, fgraph.outputs)): for node in list(graph.io_toposort(fgraph.inputs, fgraph.outputs)):
op = node.op op = node.op
# gpuarray GpuElemwise inherit from Elemwise # gpuarray GpuElemwise inherit from Elemwise
if not type(op) == OP: if not type(op) == self.op:
continue continue
# If big graph and the outputs are scalar, do not make it # If big graph and the outputs are scalar, do not make it
# inplace. # inplace.
...@@ -327,19 +357,23 @@ def inplace_elemwise_optimizer_op(OP): ...@@ -327,19 +357,23 @@ def inplace_elemwise_optimizer_op(OP):
scalar.transfer_type( scalar.transfer_type(
*[inplace_pattern.get(i, None) *[inplace_pattern.get(i, None)
for i in xrange(len(node.outputs))])) for i in xrange(len(node.outputs))]))
new_outputs = OP(new_scal, inplace_pattern)( new_outputs = self.op(new_scal, inplace_pattern)(
*node.inputs, **dict(return_list=True)) *node.inputs, **dict(return_list=True))
new_node = new_outputs[0].owner new_node = new_outputs[0].owner
for r, new_r in zip(node.outputs, new_outputs): for r, new_r in zip(node.outputs, new_outputs):
prof['nb_call_replace'] += 1
fgraph.replace(r, new_r, fgraph.replace(r, new_r,
reason="inplace_elemwise_optimizer") reason="inplace_elemwise_optimizer")
nb_change_no_validate += 1 nb_change_no_validate += 1
prof['ndim'][candidate_out_var.ndim] += 1
if nb_change_no_validate >= check_each_change: if nb_change_no_validate >= check_each_change:
prof['nb_call_validate'] += 1
fgraph.validate() fgraph.validate()
chk = fgraph.checkpoint() chk = fgraph.checkpoint()
nb_change_no_validate = 0 nb_change_no_validate = 0
except (ValueError, InconsistencyError) as e: except (ValueError, InconsistencyError) as e:
prof['nb_inconsistent'] += 1
if check_each_change != 1 and not raised_warning: if check_each_change != 1 and not raised_warning:
print(("Some inplace optimization was not " print(("Some inplace optimization was not "
"performed due to unexpected error:"), "performed due to unexpected error:"),
...@@ -362,9 +396,14 @@ def inplace_elemwise_optimizer_op(OP): ...@@ -362,9 +396,14 @@ def inplace_elemwise_optimizer_op(OP):
"performed due to unexpected error"), "performed due to unexpected error"),
file=sys.stderr) file=sys.stderr)
fgraph.revert(chk) fgraph.revert(chk)
return prof
def print_summary(self, stream=sys.stdout, level=0, depth=-1):
print("%s%s (%s)" % (
(' ' * level), self.__class__.__name__, self.op), file=stream)
return inplace_elemwise_optimizer return inplace_elemwise_optimizer
inplace_elemwise_optimizer = inplace_elemwise_optimizer_op(T.Elemwise) inplace_elemwise_optimizer = InplaceElemwiseOptimizer(T.Elemwise)
compile.optdb.register('inplace_elemwise_opt', inplace_elemwise_optimizer, 75, compile.optdb.register('inplace_elemwise_opt', inplace_elemwise_optimizer, 75,
'inplace_opt', # for historic reason 'inplace_opt', # for historic reason
'inplace_elemwise_optimizer', 'inplace_elemwise_optimizer',
...@@ -830,8 +869,7 @@ class MakeVectorPrinter: ...@@ -830,8 +869,7 @@ class MakeVectorPrinter:
else: else:
raise TypeError("Can only print make_vector.") raise TypeError("Can only print make_vector.")
T.pprint.assign(lambda pstate, r: r.owner and T.pprint.assign(MakeVector, MakeVectorPrinter())
isinstance(r.owner.op, MakeVector), MakeVectorPrinter())
class ShapeFeature(object): class ShapeFeature(object):
......
...@@ -1002,8 +1002,7 @@ class SubtensorPrinter: ...@@ -1002,8 +1002,7 @@ class SubtensorPrinter:
else: else:
raise TypeError("Can only print Subtensor.") raise TypeError("Can only print Subtensor.")
pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor), pprint.assign(Subtensor, SubtensorPrinter())
SubtensorPrinter())
def set_subtensor(x, y, inplace=False, def set_subtensor(x, y, inplace=False,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论