提交 1b1a8505 authored 作者: sentient07's avatar sentient07

Removed certain params from print_profile

上级 b5772416
...@@ -4,8 +4,8 @@ import numpy ...@@ -4,8 +4,8 @@ import numpy
import logging import logging
import pdb import pdb
import time import time
from six import itervalues, iteritems
from six.moves import xrange from six.moves import xrange
from collections import deque
import theano import theano
from theano.compat import OrderedDict from theano.compat import OrderedDict
...@@ -264,19 +264,26 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -264,19 +264,26 @@ class GraphToGPU(NavigatorOptimizer):
self.local_optimizers_all = local_optimizers_all self.local_optimizers_all = local_optimizers_all
self.local_optimizers_map = local_optimizers_map self.local_optimizers_map = local_optimizers_map
self.failure_callback = None self.failure_callback = None
self.new_opts = []
def add_requirements(self, fgraph): def add_requirements(self, fgraph):
fgraph.attach_feature(toolbox.ReplaceValidate()) fgraph.attach_feature(toolbox.ReplaceValidate())
def get_local_optimizers(self):
for opt in self.local_optimizers_all:
yield opt
# if repeat is not a problem we can drop the set
s = set()
for lopt in itervalues(self.local_optimizers_map):
for opt in lopt:
if opt not in s:
yield opt
s.add(opt)
def apply(self, fgraph): def apply(self, fgraph):
change_tracker = ChangeTracker()
mapping = {} mapping = {}
global_process_count = {}
start_nb_nodes = len(fgraph.apply_nodes) start_nb_nodes = len(fgraph.apply_nodes)
max_nb_nodes = len(fgraph.apply_nodes) max_nb_nodes = len(fgraph.apply_nodes)
loop_timing = []
loop_process_count = []
local_opt_timing = []
io_toposort_timing = [] io_toposort_timing = []
nb_nodes = [] nb_nodes = []
time_opts = {} time_opts = {}
...@@ -297,15 +304,12 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -297,15 +304,12 @@ class GraphToGPU(NavigatorOptimizer):
self.local_optimizers_map.get(type(node.op), []) + self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_map.get(node.op, [])): self.local_optimizers_map.get(node.op, [])):
process_count.setdefault(lopt, 0) process_count.setdefault(lopt, 0)
global_process_count.setdefault(lopt, 0)
time_opts.setdefault(lopt, 0) time_opts.setdefault(lopt, 0)
node_created.setdefault(lopt, 0) node_created.setdefault(lopt, 0)
topo_t0 = time.time() t_topo = time.time()
q = deque(graph.io_toposort(fgraph.inputs, fgraph.outputs)) topo = fgraph.toposort()
io_toposort_timing.append(time.time() - topo_t0) time_topo = time.time() - t_topo
nb_nodes.append(len(q))
max_nb_nodes = max(max_nb_nodes, len(q))
for node in fgraph.toposort(): for node in fgraph.toposort():
...@@ -342,11 +346,10 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -342,11 +346,10 @@ class GraphToGPU(NavigatorOptimizer):
self.local_optimizers_map.get(type(node.op), []) + self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_map.get(node.op, [])): self.local_optimizers_map.get(node.op, [])):
nb = change_tracker.nb_imported
process_count[lopt] += 1 process_count[lopt] += 1
global_process_count[lopt] += 1
node_created[lopt] += change_tracker.nb_imported - nb
if move_to_GPU: if move_to_GPU:
node_created[lopt] += len(theano.gof.graph.ops([mapping[i] for i in node.inputs], node.outputs))
t_opt = time.time()
try: try:
new_ops = lopt.transform( new_ops = lopt.transform(
node.op, context_name, node.op, context_name,
...@@ -355,9 +358,11 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -355,9 +358,11 @@ class GraphToGPU(NavigatorOptimizer):
new_ops = lopt.transform(node.op, context_name, new_ops = lopt.transform(node.op, context_name,
[mapping[i] for i in node.inputs], [mapping[i] for i in node.inputs],
out_clients) out_clients)
finally:
time_opts[lopt] += time.time() - t_opt
self.new_opts.append(lopt)
if new_ops: if new_ops:
break break
local_opt_timing.append(float(time.time() - t0))
if not new_ops: if not new_ops:
newnode = node.clone_with_new_inputs([mapping.get(i) newnode = node.clone_with_new_inputs([mapping.get(i)
for i in node.inputs]) for i in node.inputs])
...@@ -380,9 +385,6 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -380,9 +385,6 @@ class GraphToGPU(NavigatorOptimizer):
for new_o, old_o in zip(outputs, node.outputs): for new_o, old_o in zip(outputs, node.outputs):
mapping[old_o] = new_o mapping[old_o] = new_o
loop_process_count.append(process_count)
loop_timing.append(float(time.time() - t0))
new_nodes = [] new_nodes = []
for o in fgraph.outputs: for o in fgraph.outputs:
new_o = mapping[o] new_o = mapping[o]
...@@ -393,53 +395,35 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -393,53 +395,35 @@ class GraphToGPU(NavigatorOptimizer):
new_nodes.append(new_o) new_nodes.append(new_o)
fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes)) fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes))
end_nb_nodes = len(fgraph.apply_nodes)
return (self, start_nb_nodes, end_nb_nodes, max_nb_nodes, io_toposort_timing,
nb_nodes, time_opts, node_created)
@staticmethod @staticmethod
def print_profile(stream, prof, level=0): def print_profile(stream, prof, level=0):
(opt, loop_timing, loop_process_count, (opt, start_nb_nodes, end_nb_nodes, max_nb_nodes, io_toposort_timing,
(start_nb_nodes, end_nb_nodes, max_nb_nodes), nb_nodes, time_opts, node_created) = prof
local_opt_timing, nb_nodes, time_opts, io_toposort_timing,
node_created) = prof
blanc = (' ' * level) blanc = (' ' * level)
print(blanc, "GraphToGPUOptimizer", end=' ', file=stream) print(blanc, "GraphToGPUOptimizer", end=' ', file=stream)
print(blanc, getattr(opt, "name", print(blanc, getattr(opt, "name",
getattr(opt, "__name__", "")), file=stream) getattr(opt, "__name__", "")), file=stream)
print(blanc, " time %.3fs for %d passes" % (
sum(loop_timing), len(loop_timing)), file=stream)
print(blanc, " nb nodes (start, end, max) %d %d %d" % ( print(blanc, " nb nodes (start, end, max) %d %d %d" % (
start_nb_nodes, end_nb_nodes, max_nb_nodes), file=stream) start_nb_nodes, end_nb_nodes, max_nb_nodes), file=stream)
print(blanc, " time io_toposort %.3fs" % sum( print(blanc, " time io_toposort %.3fs" % sum(
io_toposort_timing), file=stream) io_toposort_timing), file=stream)
s = sum([time_opts[o] for o in opt.local_optimizers_all])
print(blanc, " time in local optimizers %.3fs" % s, file=stream) s = sum([time_opts[o] for o in opt.new_opts])
for i in range(len(loop_timing)): print(blanc, " time in local optimizers %.3fs" % s, file=stream)
lopt = ""
if loop_process_count[i]:
d = list(reversed(sorted(iteritems(loop_process_count[i]),
key=lambda a: a[1])))
lopt = " ".join([str((str(k), v)) for k, v
in d[:5]])
if len(d) > 5:
lopt += " ..."
print(blanc, (' %2d - %.3fs %d (%.3fs in global opts, '
'%.3fs io_toposort) - %d nodes - %s' % (
i, loop_timing[i],
sum(loop_process_count[i].values()),
local_opt_timing[i],
io_toposort_timing[i], nb_nodes[i],
lopt)), file=stream)
count_opt = [] count_opt = []
not_used = [] not_used = []
not_used_time = 0 not_used_time = 0
process_count = {} process_count = {}
for o in (opt.local_optimizers_all + for o in (opt.new_opts):
list(opt.local_optimizers_map.get(type(node.op), [])) +
list(opt.local_optimizers_map.get(node.op, []))):
process_count.setdefault(o, 0) process_count.setdefault(o, 0)
for count in loop_process_count:
for o, v in iteritems(count):
process_count[o] += v
for o, count in iteritems(process_count): for o, count in iteritems(process_count):
if count > 0: if count > 0:
count_opt.append((time_opts[o], count, count_opt.append((time_opts[o], count,
...@@ -497,37 +481,22 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -497,37 +481,22 @@ class GraphToGPU(NavigatorOptimizer):
l[idx] += nb l[idx] += nb
else: else:
l.append(nb) l.append(nb)
return l return l
loop_timing = merge_list(prof1[1], prof2[1])
loop_process_count = list(prof1[2])
for i in range(min(len(loop_process_count), len(prof2[2]))):
process_count = loop_process_count[i]
for process, count in iteritems(prof2[2][i]):
if process in process_count:
process_count[process] += count
else:
process_count[process] = count
loop_process_count.extend(prof2[2][len(loop_process_count):])
max_nb_nodes = max(prof1[3], prof2[3]) max_nb_nodes = max(prof1[3], prof2[3])
nb_nodes = merge_list(prof1[4], prof2[4]) io_toposort_timing = merge_list(prof1[4], prof2[4])
time_opts = merge_dict(prof1[5], prof2[5]) nb_nodes = merge_list(prof1[5], prof2[5])
io_toposort_timing = merge_list(prof1[6], prof2[6])
assert len(loop_timing) == max(len(prof1[1]), len(prof2[1])) time_opts = merge_dict(prof1[6], prof2[6])
node_created = merge_dict(prof1[7], prof2[7]) node_created = merge_dict(prof1[7], prof2[7])
return (new_opt, return (new_opt,
loop_timing,
loop_process_count,
max_nb_nodes, max_nb_nodes,
io_toposort_timing,
nb_nodes, nb_nodes,
time_opts, time_opts,
io_toposort_timing,
node_created) node_created)
...@@ -624,7 +593,7 @@ def local_gpuaalloc(op, context_name, inputs): ...@@ -624,7 +593,7 @@ def local_gpuaalloc(op, context_name, inputs):
def local_gpuaallocempty(op, context_name, inputs): def local_gpuaallocempty(op, context_name, inputs):
# We use _props_dict() to make sure that the GPU op know all the # We use _props_dict() to make sure that the GPU op know all the
# CPU op props. # CPU op props.
return gpu_alloc_empty(context_name=context_name, return GpuAllocEmpty(context_name=context_name,
**op._props_dict())(*inputs) **op._props_dict())(*inputs)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论