提交 c2a6a650 authored 作者: sentient07's avatar sentient07

Addressed comments

上级 c8ba3f55
...@@ -2413,7 +2413,7 @@ class EquilibriumOptimizer(NavigatorOptimizer): ...@@ -2413,7 +2413,7 @@ class EquilibriumOptimizer(NavigatorOptimizer):
for (t, count, n_created, o) in count_opt[::-1]: for (t, count, n_created, o) in count_opt[::-1]:
print(blanc, ' %.3fs - %d - %d - %s' % ( print(blanc, ' %.3fs - %d - %d - %s' % (
t, count, n_created, o), file=stream) t, count, n_created, o), file=stream)
print(blanc, ' %.3fs - in %d optimization that where not used (display only those with a runtime > 0)' % ( print(blanc, ' %.3fs - in %d optimization that were not used (display only those with a runtime > 0)' % (
not_used_time, len(not_used)), file=stream) not_used_time, len(not_used)), file=stream)
not_used.sort(key=lambda nu: (nu[0], str(nu[1]))) not_used.sort(key=lambda nu: (nu[0], str(nu[1])))
for (t, o) in not_used[::-1]: for (t, o) in not_used[::-1]:
......
...@@ -4,7 +4,7 @@ import numpy ...@@ -4,7 +4,7 @@ import numpy
import logging import logging
import pdb import pdb
import time import time
from six import itervalues, iteritems from six import iteritems
from six.moves import xrange from six.moves import xrange
import theano import theano
...@@ -265,23 +265,11 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -265,23 +265,11 @@ class GraphToGPU(NavigatorOptimizer):
def add_requirements(self, fgraph): def add_requirements(self, fgraph):
fgraph.attach_feature(toolbox.ReplaceValidate()) fgraph.attach_feature(toolbox.ReplaceValidate())
def get_local_optimizers(self):
for opt in self.local_optimizers_all:
yield opt
# if repeat is not a problem we can drop the set
s = set()
for lopt in itervalues(self.local_optimizers_map):
for opt in lopt:
if opt not in s:
yield opt
s.add(opt)
def apply(self, fgraph): def apply(self, fgraph):
mapping = {} mapping = {}
time_opts = {} time_opts = {}
node_created = {} node_created = {}
process_count = {} process_count = {}
io_toposort_timing = []
# Building a new graph # Building a new graph
# Iterating through inputs of graph # Iterating through inputs of graph
for i in fgraph.inputs: for i in fgraph.inputs:
...@@ -293,19 +281,19 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -293,19 +281,19 @@ class GraphToGPU(NavigatorOptimizer):
if isinstance(i, theano.Constant): if isinstance(i, theano.Constant):
mapping[i] = i mapping[i] = i
for node in fgraph.toposort(): for node in fgraph.toposort():
for lopt in (self.local_optimizers_all + for lopt in (self.local_optimizers_map.get(node.op, []) +
self.local_optimizers_map.get(type(node.op), []) + self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_map.get(node.op, [])): self.local_optimizers_all):
process_count.setdefault(lopt, 0) process_count.setdefault(lopt, 0)
time_opts.setdefault(lopt, 0) time_opts.setdefault(lopt, 0)
node_created.setdefault(lopt, 0) node_created.setdefault(lopt, 0)
t_topo = time.time() t_topo = time.time()
fgraph.toposort() topo = fgraph.toposort()
time_topo = time.time() - t_topo time_topo = time.time() - t_topo
io_toposort_timing.append(time_topo - t_topo) toposort_timing = time_topo - t_topo
for node in fgraph.toposort(): for node in topo:
if isinstance(node.op, HostFromGpu): if isinstance(node.op, HostFromGpu):
mapping[node.outputs[0]] = node.inputs[0] mapping[node.outputs[0]] = node.inputs[0]
...@@ -334,9 +322,9 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -334,9 +322,9 @@ class GraphToGPU(NavigatorOptimizer):
new_ops = None new_ops = None
outputs = [] outputs = []
# Apply the lifter # Apply the lifter
for lopt in (self.local_optimizers_all + for lopt in (self.local_optimizers_map.get(node.op, []) +
self.local_optimizers_map.get(type(node.op), []) + self.local_optimizers_map.get(type(node.op), []) +
self.local_optimizers_map.get(node.op, [])): self.local_optimizers_all):
if move_to_GPU: if move_to_GPU:
t_opt = time.time() t_opt = time.time()
...@@ -392,11 +380,11 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -392,11 +380,11 @@ class GraphToGPU(NavigatorOptimizer):
new_nodes.append(new_o) new_nodes.append(new_o)
fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes)) fgraph.replace_all_validate(zip(fgraph.outputs, new_nodes))
return (self, io_toposort_timing, time_opts, node_created, process_count) return (self, toposort_timing, time_opts, node_created, process_count)
@staticmethod @staticmethod
def print_profile(stream, prof, level=0): def print_profile(stream, prof, level=0):
(opt, io_toposort_timing, time_opts, node_created, process_count) = prof (opt, toposort_timing, time_opts, node_created, process_count) = prof
blanc = (' ' * level) blanc = (' ' * level)
print(blanc, "GraphToGPUOptimizer", end=' ', file=stream) print(blanc, "GraphToGPUOptimizer", end=' ', file=stream)
...@@ -404,7 +392,7 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -404,7 +392,7 @@ class GraphToGPU(NavigatorOptimizer):
getattr(opt, "__name__", "")), file=stream) getattr(opt, "__name__", "")), file=stream)
print(blanc, " time io_toposort %.3fs" % sum( print(blanc, " time io_toposort %.3fs" % sum(
io_toposort_timing), file=stream) toposort_timing), file=stream)
s = sum([v for k, v in time_opts.iteritems()]) s = sum([v for k, v in time_opts.iteritems()])
print(blanc, "Total time taken by local optimizers %.3fs " % s, file=stream) print(blanc, "Total time taken by local optimizers %.3fs " % s, file=stream)
...@@ -440,8 +428,7 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -440,8 +428,7 @@ class GraphToGPU(NavigatorOptimizer):
@staticmethod @staticmethod
def merge_profile(prof1, prof2): def merge_profile(prof1, prof2):
# (opt, loop_timing, loop_process_count, max_nb_nodes, # (opt, toposort_timing, time_opts, node_created, process_count) = prof1
# global_opt_timing, nb_nodes, time_opts, io_toposort_timing) = prof1
local_optimizers = OrderedSet(prof1[0].local_optimizers_all).union( local_optimizers = OrderedSet(prof1[0].local_optimizers_all).union(
prof2[0].local_optimizers_all) prof2[0].local_optimizers_all)
...@@ -470,12 +457,12 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -470,12 +457,12 @@ class GraphToGPU(NavigatorOptimizer):
l.append(nb) l.append(nb)
return l return l
io_toposort_timing = merge_list(prof1[1], prof2[1]) toposort_timing = prof1[1] + prof2[1]
time_opts = merge_dict(prof1[2], prof2[2]) time_opts = merge_dict(prof1[2], prof2[2])
node_created = merge_dict(prof1[3], prof2[3]) node_created = merge_dict(prof1[3], prof2[3])
process_count = merge_dict(prof1[4], prof2[4]) process_count = merge_dict(prof1[4], prof2[4])
return (new_opt, return (new_opt,
io_toposort_timing, toposort_timing,
time_opts, time_opts,
node_created, node_created,
process_count) process_count)
...@@ -848,6 +835,7 @@ def local_gpu_pdbbreakpoint_op(node): ...@@ -848,6 +835,7 @@ def local_gpu_pdbbreakpoint_op(node):
def local_gpua_lazy_ifelse(op, context_name, inputs): def local_gpua_lazy_ifelse(op, context_name, inputs):
if op.gpu: if op.gpu:
return return
# this node is already on GPU, so don't change the graph
if isinstance(inputs[0].type, GpuArrayType): if isinstance(inputs[0].type, GpuArrayType):
return return
c = inputs[0] c = inputs[0]
...@@ -1193,11 +1181,10 @@ def local_gpua_softmaxwithbias(node, context_name): ...@@ -1193,11 +1181,10 @@ def local_gpua_softmaxwithbias(node, context_name):
@register_opt2([theano.tensor.opt.Assert], 'fast_compile') @register_opt2([theano.tensor.opt.Assert], 'fast_compile')
def local_assert(op, context_name, inputs): def local_assert(op, context_name, inputs):
# Check if input nodes are already on the GPU # Check if input nodes are already on the GPU
if isinstance(node.inputs[0].type, GpuArrayType): if isinstance(inputs[0].type, GpuArrayType):
return return
return [host_from_gpu(op(as_gpuarray_variable(inputs[0], return [op(as_gpuarray_variable(inputs[0], context_name),
context_name), *inputs[1:])]
*inputs[1:]))]
@register_opt('fast_compile') @register_opt('fast_compile')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论