提交 67927672 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5794 from ReyhaneAskari/faster_topo

Faster topo
......@@ -225,6 +225,20 @@ import theano and print the config variable, as in:
If True, we will print extra scan debug information.
.. attribute:: cycle_detection
String value, either ``regular`` or ``fast```
Default: ``regular``
If :attr:`cycle_detection` is set to ``regular``, most inplaces are allowed,
but it is slower. If :attr:`cycle_detection` is set to ``faster``,
less inplaces are allowed, but it makes the compilation faster.
The interaction of which one give the lower peak memory usage is complicated and
not predictable, so if you are close to the peak memory usage, triyng both
could give you a small gain.
.. attribute:: openmp
Bool value: either ``True`` or ``False``
......
......@@ -830,7 +830,6 @@ class ProfileStats(object):
"""
from theano.gpuarray import GpuArrayType
# Initial Mem info values [CPU, GPU]
node_memory_size = [0, 0]
running_memory_size = [0, 0]
......
......@@ -1476,6 +1476,17 @@ AddConfigVar('compile.wait',
IntParam(5, lambda i: i > 0, allow_override=False),
in_c_key=False)
AddConfigVar('cycle_detection',
"If cycle_detection is set to regular, most inplaces are allowed,"
"but it is slower. If cycle_detection is set to faster, less inplaces"
"are allowed, but it makes the compilation faster."
"The interaction of which one give the lower peak memory usage is"
"complicated and not predictable, so if you are close to the peak"
"memory usage, triyng both could give you a small gain. ",
EnumStr('regular', 'fast'),
in_c_key=False)
def _timeout_default():
return theano.config.compile.wait * 24
......
......@@ -654,8 +654,9 @@ class FunctionGraph(utils.object2):
take care of computing dependencies by itself.
"""
ords = OrderedDict()
assert isinstance(self._features, list)
all_orderings = []
for feature in self._features:
if hasattr(feature, 'orderings'):
orderings = feature.orderings(self)
......@@ -664,16 +665,23 @@ class FunctionGraph(utils.object2):
str(feature.orderings) +
". Nondeterministic object is " +
str(orderings))
if len(orderings) > 0:
all_orderings.append(orderings)
for node, prereqs in iteritems(orderings):
if not isinstance(prereqs, (list, OrderedSet)):
raise TypeError(
"prereqs must be a type with a "
"deterministic iteration order, or toposort "
" will be non-deterministic.")
if len(all_orderings) == 1:
# If there is only 1 ordering, we reuse it directly.
return all_orderings[0].copy()
else:
# If there is more than 1 ordering, combine them.
ords = OrderedDict()
for orderings in all_orderings:
for node, prereqs in iteritems(orderings):
ords.setdefault(node, []).extend(prereqs)
# eliminate duplicate prereqs
for (node, prereqs) in iteritems(ords):
ords[node] = list(OrderedSet(prereqs))
return ords
def check_integrity(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论