提交 539ca7eb authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #4831 from nouiz/compiledir_format

Add device in the compiledir_format
...@@ -1769,12 +1769,13 @@ class _Linker(gof.link.LocalLinker): ...@@ -1769,12 +1769,13 @@ class _Linker(gof.link.LocalLinker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
if no_recycling is None: if no_recycling is None:
no_recycling = [] no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph: if self.fgraph is not None and self.fgraph is not fgraph:
assert type(self) is _Linker assert type(self) is _Linker
return type(self)(maker=self.maker).accept(fgraph, no_recycling) return type(self)(maker=self.maker).accept(
fgraph, no_recycling, profile)
self.fgraph = fgraph self.fgraph = fgraph
self.no_recycling = no_recycling self.no_recycling = no_recycling
return self return self
......
...@@ -1500,9 +1500,10 @@ class FunctionMaker(object): ...@@ -1500,9 +1500,10 @@ class FunctionMaker(object):
if not spec.borrow] if not spec.borrow]
if no_borrow: if no_borrow:
self.linker = linker.accept( self.linker = linker.accept(
fgraph, no_recycling=infer_reuse_pattern(fgraph, no_borrow)) fgraph, no_recycling=infer_reuse_pattern(fgraph, no_borrow),
profile=profile)
else: else:
self.linker = linker.accept(fgraph) self.linker = linker.accept(fgraph, profile=profile)
if hasattr(linker, 'accept_var_updates'): if hasattr(linker, 'accept_var_updates'):
# hacky thing so VMLinker knows about updates # hacky thing so VMLinker knows about updates
......
...@@ -72,7 +72,8 @@ def _atexit_print_fn(): ...@@ -72,7 +72,8 @@ def _atexit_print_fn():
for ps in to_sum[1:]: for ps in to_sum[1:]:
for attr in ["compile_time", "fct_call_time", "fct_callcount", for attr in ["compile_time", "fct_call_time", "fct_callcount",
"vm_call_time", "optimizer_time", "linker_time", "vm_call_time", "optimizer_time", "linker_time",
"validate_time", "import_time"]: "validate_time", "import_time",
"linker_node_make_thunks"]:
setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr)) setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
# merge dictonary # merge dictonary
...@@ -190,6 +191,8 @@ class ProfileStats(object): ...@@ -190,6 +191,8 @@ class ProfileStats(object):
import_time = 0.0 import_time = 0.0
# time spent in importing compiled python module. # time spent in importing compiled python module.
linker_node_make_thunks = 0.0
line_width = config.profiling.output_line_width line_width = config.profiling.output_line_width
nb_nodes = -1 nb_nodes = -1
...@@ -665,6 +668,8 @@ class ProfileStats(object): ...@@ -665,6 +668,8 @@ class ProfileStats(object):
print(' Theano Linker time (includes C, CUDA code ' print(' Theano Linker time (includes C, CUDA code '
'generation/compiling): %es' % self.linker_time, file=file) 'generation/compiling): %es' % self.linker_time, file=file)
print(' Import time %es' % self.import_time, file=file) print(' Import time %es' % self.import_time, file=file)
print(' Node make_thunk time %es' % self.linker_node_make_thunks,
file=file)
print('', file=file) print('', file=file)
# The validation time is a subset of optimizer_time # The validation time is a subset of optimizer_time
......
...@@ -1630,6 +1630,8 @@ def short_platform(r=None, p=None): ...@@ -1630,6 +1630,8 @@ def short_platform(r=None, p=None):
return p return p
compiledir_format_dict['short_platform'] = short_platform() compiledir_format_dict['short_platform'] = short_platform()
# Allow to have easily one compiledir per device.
compiledir_format_dict['device'] = config.device
compiledir_format_keys = ", ".join(sorted(compiledir_format_dict.keys())) compiledir_format_keys = ", ".join(sorted(compiledir_format_dict.keys()))
default_compiledir_format = ("compiledir_%(short_platform)s-%(processor)s-" default_compiledir_format = ("compiledir_%(short_platform)s-%(processor)s-"
"%(python_version)s-%(python_bitwidth)s") "%(python_version)s-%(python_bitwidth)s")
......
...@@ -548,7 +548,7 @@ class CLinker(link.Linker): ...@@ -548,7 +548,7 @@ class CLinker(link.Linker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """
Associate linker with fgraph Associate linker with fgraph
...@@ -557,7 +557,8 @@ class CLinker(link.Linker): ...@@ -557,7 +557,8 @@ class CLinker(link.Linker):
no_recycling = [] no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph: if self.fgraph is not None and self.fgraph is not fgraph:
# A linker can be tied to only one FunctionGraph. # A linker can be tied to only one FunctionGraph.
return type(self)(self.schedule).accept(fgraph, no_recycling) return type(self)(self.schedule).accept(
fgraph, no_recycling, profile)
self.fgraph = fgraph self.fgraph = fgraph
self.fetch_variables() self.fetch_variables()
self.no_recycling = no_recycling self.no_recycling = no_recycling
...@@ -1737,7 +1738,7 @@ class OpWiseCLinker(link.LocalLinker): ...@@ -1737,7 +1738,7 @@ class OpWiseCLinker(link.LocalLinker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """
Associate linker with fgraph Associate linker with fgraph
""" """
...@@ -1750,7 +1751,7 @@ class OpWiseCLinker(link.LocalLinker): ...@@ -1750,7 +1751,7 @@ class OpWiseCLinker(link.LocalLinker):
allow_gc=self.allow_gc, allow_gc=self.allow_gc,
nice_errors=self.nice_errors, nice_errors=self.nice_errors,
schedule=self.schedule, schedule=self.schedule,
).accept(fgraph, no_recycling) ).accept(fgraph, no_recycling, profile)
self.fgraph = fgraph self.fgraph = fgraph
self.no_recycling = no_recycling self.no_recycling = no_recycling
return self return self
...@@ -1897,7 +1898,7 @@ class DualLinker(link.Linker): ...@@ -1897,7 +1898,7 @@ class DualLinker(link.Linker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """
Update/tie self with fgraph Update/tie self with fgraph
""" """
...@@ -1905,7 +1906,7 @@ class DualLinker(link.Linker): ...@@ -1905,7 +1906,7 @@ class DualLinker(link.Linker):
no_recycling = [] no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph: if self.fgraph is not None and self.fgraph is not fgraph:
return type(self)(self.checker, self.schedule).accept( return type(self)(self.checker, self.schedule).accept(
fgraph, no_recycling) fgraph, no_recycling, profile)
self.fgraph = fgraph self.fgraph = fgraph
self.no_recycling = no_recycling self.no_recycling = no_recycling
return self return self
......
...@@ -762,7 +762,7 @@ class PerformLinker(LocalLinker): ...@@ -762,7 +762,7 @@ class PerformLinker(LocalLinker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """
Parameters Parameters
...@@ -781,7 +781,8 @@ class PerformLinker(LocalLinker): ...@@ -781,7 +781,8 @@ class PerformLinker(LocalLinker):
if no_recycling is None: if no_recycling is None:
no_recycling = [] no_recycling = []
if self.fgraph is not None and self.fgraph is not fgraph: if self.fgraph is not None and self.fgraph is not fgraph:
return type(self)(allow_gc=self.allow_gc).accept(fgraph, no_recycling) return type(self)(allow_gc=self.allow_gc).accept(
fgraph, no_recycling, profile)
# raise Exception("Cannot accept from a Linker that is already tied to another FunctionGraph.") # raise Exception("Cannot accept from a Linker that is already tied to another FunctionGraph.")
self.fgraph = fgraph self.fgraph = fgraph
self.no_recycling = no_recycling self.no_recycling = no_recycling
...@@ -944,7 +945,7 @@ class WrapLinker(Linker): ...@@ -944,7 +945,7 @@ class WrapLinker(Linker):
linkers=[l.clone(allow_gc=allow_gc) for l in self.linkers], linkers=[l.clone(allow_gc=allow_gc) for l in self.linkers],
wrapper=self.wrapper) wrapper=self.wrapper)
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """
Parameters Parameters
......
...@@ -731,7 +731,7 @@ class VM_Linker(link.LocalLinker): ...@@ -731,7 +731,7 @@ class VM_Linker(link.LocalLinker):
if schedule: if schedule:
self.schedule = schedule self.schedule = schedule
def accept(self, fgraph, no_recycling=None): def accept(self, fgraph, no_recycling=None, profile=None):
""" """
Check if fgraph is the first FunctionGraph that has ever been Check if fgraph is the first FunctionGraph that has ever been
associated to self, else, create a new VM_Linker associated to self, else, create a new VM_Linker
...@@ -779,9 +779,11 @@ class VM_Linker(link.LocalLinker): ...@@ -779,9 +779,11 @@ class VM_Linker(link.LocalLinker):
schedule=self.schedule, schedule=self.schedule,
c_thunks=self.c_thunks, c_thunks=self.c_thunks,
allow_partial_eval=self.allow_partial_eval allow_partial_eval=self.allow_partial_eval
).accept(fgraph, no_recycling) ).accept(fgraph, no_recycling, profile)
self.fgraph = fgraph self.fgraph = fgraph
self.no_recycling = no_recycling self.no_recycling = no_recycling
self.profile = profile
return self return self
def accept_var_updates(self, updated_vars): def accept_var_updates(self, updated_vars):
...@@ -1038,7 +1040,7 @@ class VM_Linker(link.LocalLinker): ...@@ -1038,7 +1040,7 @@ class VM_Linker(link.LocalLinker):
reallocated_info = calculate_reallocate_info( reallocated_info = calculate_reallocate_info(
order, fgraph, storage_map, compute_map_re, dependencies) order, fgraph, storage_map, compute_map_re, dependencies)
t0 = time.time()
for node in order: for node in order:
try: try:
if self.c_thunks is False: if self.c_thunks is False:
...@@ -1056,6 +1058,11 @@ class VM_Linker(link.LocalLinker): ...@@ -1056,6 +1058,11 @@ class VM_Linker(link.LocalLinker):
e.args = ("The following error happened while" e.args = ("The following error happened while"
" compiling the node", node, "\n") + e.args " compiling the node", node, "\n") + e.args
raise raise
t1 = time.time()
if self.profile:
self.profile.linker_node_make_thunks += t1 - t0
for node, thunk in zip(order, thunks): for node, thunk in zip(order, thunks):
thunk.inputs = [storage_map[v] for v in node.inputs] thunk.inputs = [storage_map[v] for v in node.inputs]
thunk.outputs = [storage_map[v] for v in node.outputs] thunk.outputs = [storage_map[v] for v in node.outputs]
......
...@@ -6,6 +6,7 @@ import pdb ...@@ -6,6 +6,7 @@ import pdb
import time import time
from six import iteritems from six import iteritems
from six.moves import xrange from six.moves import xrange
import sys
import theano import theano
from theano import tensor, scalar, gof, config from theano import tensor, scalar, gof, config
...@@ -13,7 +14,6 @@ from theano.compile import optdb ...@@ -13,7 +14,6 @@ from theano.compile import optdb
from theano.compile.ops import shape_i from theano.compile.ops import shape_i
from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer, from theano.gof import (local_optimizer, EquilibriumDB, TopoOptimizer,
SequenceDB, Optimizer, DB, toolbox, graph) SequenceDB, Optimizer, DB, toolbox, graph)
from theano.gof.opt import NavigatorOptimizer
from theano.ifelse import IfElse from theano.ifelse import IfElse
from theano.misc.ordered_set import OrderedSet from theano.misc.ordered_set import OrderedSet
...@@ -262,7 +262,7 @@ gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(), ...@@ -262,7 +262,7 @@ gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(),
0, 'fast_run', 'fast_compile', 'merge') 0, 'fast_run', 'fast_compile', 'merge')
class GraphToGPU(NavigatorOptimizer): class GraphToGPU(Optimizer):
""" """
Transfer the graph as a whole to GPU instead of transfering node by node. Transfer the graph as a whole to GPU instead of transfering node by node.
...@@ -485,6 +485,16 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -485,6 +485,16 @@ class GraphToGPU(NavigatorOptimizer):
node_created, node_created,
process_count) process_count)
def print_summary(self, stream=sys.stdout, level=0, depth=-1):
print("%s%s (%i)" % (
(' ' * level), self.__class__.__name__, id(self)), file=stream)
if depth != 0:
map_values = []
for opts in self.local_optimizers_map.values():
map_values += opts
for opt in self.local_optimizers_all + map_values:
opt.print_summary(stream, level=(level + 2), depth=(depth - 1))
@local_optimizer([GpuFromHost, GpuToGpu, HostFromGpu]) @local_optimizer([GpuFromHost, GpuToGpu, HostFromGpu])
def local_cut_gpu_transfers(node): def local_cut_gpu_transfers(node):
......
...@@ -700,6 +700,8 @@ def local_gpu_solve(node): ...@@ -700,6 +700,8 @@ def local_gpu_solve(node):
CpuSolve(host_from_gpu) -> host_from_gpu(GpuSolve) CpuSolve(host_from_gpu) -> host_from_gpu(GpuSolve)
""" """
if node.outputs[0].dtype != 'float32':
return
if isinstance(node.op, GpuFromHost): if isinstance(node.op, GpuFromHost):
host_input = node.inputs[0] host_input = node.inputs[0]
if (host_input.owner and if (host_input.owner and
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论