提交 539ca7eb authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #4831 from nouiz/compiledir_format

Add device in the compiledir_format
......@@ -1769,12 +1769,13 @@ class _Linker(gof.link.LocalLinker):
if schedule:
self.schedule = schedule
def accept(self, fgraph, no_recycling=None):
def accept(self, fgraph, no_recycling=None, profile=None):
if no_recycling is None:
no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph:
assert type(self) is _Linker
return type(self)(maker=self.maker).accept(fgraph, no_recycling)
return type(self)(maker=self.maker).accept(
fgraph, no_recycling, profile)
self.fgraph = fgraph
self.no_recycling = no_recycling
return self
......
......@@ -1500,9 +1500,10 @@ class FunctionMaker(object):
if not spec.borrow]
if no_borrow:
self.linker = linker.accept(
fgraph, no_recycling=infer_reuse_pattern(fgraph, no_borrow))
fgraph, no_recycling=infer_reuse_pattern(fgraph, no_borrow),
profile=profile)
else:
self.linker = linker.accept(fgraph)
self.linker = linker.accept(fgraph, profile=profile)
if hasattr(linker, 'accept_var_updates'):
# hacky thing so VMLinker knows about updates
......
......@@ -72,7 +72,8 @@ def _atexit_print_fn():
for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time",
"validate_time", "import_time"]:
"validate_time", "import_time",
"linker_node_make_thunks"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary
......@@ -190,6 +191,8 @@ class ProfileStats(object):
import_time = 0.0
# time spent in importing compiled python module.
linker_node_make_thunks = 0.0
line_width = config.profiling.output_line_width
nb_nodes = -1
......@@ -665,6 +668,8 @@ class ProfileStats(object):
print(' Theano Linker time (includes C, CUDA code '
'generation/compiling): %es' % self.linker_time, file=file)
print(' Import time %es' % self.import_time, file=file)
print(' Node make_thunk time %es' % self.linker_node_make_thunks,
file=file)
print('', file=file)
# The validation time is a subset of optimizer_time
......
......@@ -1630,6 +1630,8 @@ def short_platform(r=None, p=None):
return p
compiledir_format_dict['short_platform'] = short_platform()
# Allow to have easily one compiledir per device.
compiledir_format_dict['device'] = config.device
compiledir_format_keys = ", ".join(sorted(compiledir_format_dict.keys()))
default_compiledir_format = ("compiledir_%(short_platform)s-%(processor)s-"
"%(python_version)s-%(python_bitwidth)s")
......
......@@ -548,7 +548,7 @@ class CLinker(link.Linker):
if schedule:
self.schedule = schedule
def accept(self, fgraph, no_recycling=None):
def accept(self, fgraph, no_recycling=None, profile=None):
"""
Associate linker with fgraph
......@@ -557,7 +557,8 @@ class CLinker(link.Linker):
no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph:
# A linker can be tied to only one FunctionGraph.
return type(self)(self.schedule).accept(fgraph, no_recycling)
return type(self)(self.schedule).accept(
fgraph, no_recycling, profile)
self.fgraph = fgraph
self.fetch_variables()
self.no_recycling = no_recycling
......@@ -1737,7 +1738,7 @@ class OpWiseCLinker(link.LocalLinker):
if schedule:
self.schedule = schedule
def accept(self, fgraph, no_recycling=None):
def accept(self, fgraph, no_recycling=None, profile=None):
"""
Associate linker with fgraph
"""
......@@ -1750,7 +1751,7 @@ class OpWiseCLinker(link.LocalLinker):
allow_gc=self.allow_gc,
nice_errors=self.nice_errors,
schedule=self.schedule,
).accept(fgraph, no_recycling)
).accept(fgraph, no_recycling, profile)
self.fgraph = fgraph
self.no_recycling = no_recycling
return self
......@@ -1897,7 +1898,7 @@ class DualLinker(link.Linker):
if schedule:
self.schedule = schedule
def accept(self, fgraph, no_recycling=None):
def accept(self, fgraph, no_recycling=None, profile=None):
"""
Update/tie self with fgraph
"""
......@@ -1905,7 +1906,7 @@ class DualLinker(link.Linker):
no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph:
return type(self)(self.checker, self.schedule).accept(
fgraph, no_recycling)
fgraph, no_recycling, profile)
self.fgraph = fgraph
self.no_recycling = no_recycling
return self
......
......@@ -762,7 +762,7 @@ class PerformLinker(LocalLinker):
if schedule:
self.schedule = schedule
def accept(self, fgraph, no_recycling=None):
def accept(self, fgraph, no_recycling=None, profile=None):
"""
Parameters
......@@ -781,7 +781,8 @@ class PerformLinker(LocalLinker):
if no_recycling is None:
no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph:
return type(self)(allow_gc=self.allow_gc).accept(fgraph, no_recycling)
return type(self)(allow_gc=self.allow_gc).accept(
fgraph, no_recycling, profile)
# raise Exception("Cannot accept from a Linker that is already tied to another FunctionGraph.")
self.fgraph = fgraph
self.no_recycling = no_recycling
......@@ -944,7 +945,7 @@ class WrapLinker(Linker):
linkers=[l.clone(allow_gc=allow_gc) for l in self.linkers],
wrapper=self.wrapper)
def accept(self, fgraph, no_recycling=None):
def accept(self, fgraph, no_recycling=None, profile=None):
"""
Parameters
......
......@@ -731,7 +731,7 @@ class VM_Linker(link.LocalLinker):
if schedule:
self.schedule = schedule
def accept(self, fgraph, no_recycling=None):
def accept(self, fgraph, no_recycling=None, profile=None):
"""
Check if fgraph is the first FunctionGraph that has ever been
associated to self, else, create a new VM_Linker
......@@ -779,9 +779,11 @@ class VM_Linker(link.LocalLinker):
schedule=self.schedule,
c_thunks=self.c_thunks,
allow_partial_eval=self.allow_partial_eval
).accept(fgraph, no_recycling)
).accept(fgraph, no_recycling, profile)
self.fgraph = fgraph
self.no_recycling = no_recycling
self.profile = profile
return self
def accept_var_updates(self, updated_vars):
......@@ -1038,7 +1040,7 @@ class VM_Linker(link.LocalLinker):
reallocated_info = calculate_reallocate_info(
order, fgraph, storage_map, compute_map_re, dependencies)
t0 = time.time()
for node in order:
try:
if self.c_thunks is False:
......@@ -1056,6 +1058,11 @@ class VM_Linker(link.LocalLinker):
e.args = ("The following error happened while"
" compiling the node", node, "\n") + e.args
raise
t1 = time.time()
if self.profile:
self.profile.linker_node_make_thunks += t1 - t0
for node, thunk in zip(order, thunks):
thunk.inputs = [storage_map[v] for v in node.inputs]
thunk.outputs = [storage_map[v] for v in node.outputs]
......
......@@ -6,6 +6,7 @@ import pdb
import time
from six import iteritems
from six.moves import xrange
import sys
import theano
from theano import tensor, scalar, gof, config
......@@ -13,7 +14,6 @@ from theano.compile import optdb
from theano.compile.ops import shape_i
from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
SequenceDB, Optimizer, DB, toolbox, graph)
from theano.gof.opt import NavigatorOptimizer
from theano.ifelse import IfElse
from theano.misc.ordered_set import OrderedSet
......@@ -262,7 +262,7 @@ gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(),
0, 'fast_run', 'fast_compile', 'merge')
class GraphToGPU(NavigatorOptimizer):
class GraphToGPU(Optimizer):
"""
Transfer the graph as a whole to GPU instead of transfering node by node.
......@@ -485,6 +485,16 @@ class GraphToGPU(NavigatorOptimizer):
node_created,
process_count)
def print_summary(self, stream=sys.stdout, level=0, depth=-1):
print("%s%s (%i)" % (
(' ' * level), self.__class__.__name__, id(self)), file=stream)
if depth != 0:
map_values = []
for opts in self.local_optimizers_map.values():
map_values += opts
for opt in self.local_optimizers_all + map_values:
opt.print_summary(stream, level=(level + 2), depth=(depth - 1))
@local_optimizer([GpuFromHost, GpuToGpu, HostFromGpu])
def local_cut_gpu_transfers(node):
......
......@@ -700,6 +700,8 @@ def local_gpu_solve(node):
CpuSolve(host_from_gpu) -> host_from_gpu(GpuSolve)
"""
if node.outputs[0].dtype != 'float32':
return
if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0]
if (host_input.owner and
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论