提交 d89bd8ec authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5502 from ReyhaneAskari/io_toposort_5042

IO_toposort
......@@ -53,9 +53,9 @@ can lead to errors. Consider this example:
>>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
MakeVector{dtype='int64'} [id A] '' 4
|Elemwise{Add}[(0, 0)] [id B] '' 3
| |Shape_i{0} [id C] '' 1
| |Shape_i{0} [id C] '' 2
| | |x [id D]
| |Shape_i{0} [id E] '' 2
| |Shape_i{0} [id E] '' 1
| |y [id F]
|Shape_i{1} [id G] '' 0
|x [id D]
......
......@@ -56,8 +56,8 @@ class Test_profiling(unittest.TestCase):
lines1 = [l for l in the_string.split("\n") if "Max if linker" in l]
lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l]
if theano.config.device == 'cpu':
assert "CPU: 4112KB (8204KB)" in the_string, (lines1, lines2)
assert "CPU: 8204KB (12296KB)" in the_string, (lines1, lines2)
assert "CPU: 4112KB (4104KB)" in the_string, (lines1, lines2)
assert "CPU: 8204KB (8196KB)" in the_string, (lines1, lines2)
assert "CPU: 8208KB" in the_string, (lines1, lines2)
assert "Minimum peak from all valid apply node order is 4104KB" in the_string, (
lines1, lines2)
......
......@@ -608,6 +608,8 @@ def stack_search(start, expand, mode='bfs', build_inv=False):
expand : callable
When we get to a node, add expand(node) to the list of nodes to visit.
This function should return a list, or None.
mode : string
'bfs' or 'dfs' for breath first search or depth first search.
Returns
-------
......@@ -632,7 +634,7 @@ def stack_search(start, expand, mode='bfs', build_inv=False):
start_pop = start.popleft
else:
start_pop = start.pop
expand_inv = {}
expand_inv = {} # var: clients
while start:
l = start_pop()
if id(l) not in rval_set:
......@@ -878,7 +880,7 @@ def clone_get_equiv(inputs, outputs, copy_inputs_and_orphans=True, memo=None):
return memo
def general_toposort(r_out, deps, debug_print=False,
def general_toposort(outputs, deps, debug_print=False,
compute_deps_cache=None, deps_cache=None,
clients=None):
"""
......@@ -932,9 +934,9 @@ def general_toposort(r_out, deps, debug_print=False,
return deps_cache[io]
assert deps_cache is not None
assert isinstance(r_out, (tuple, list, deque))
assert isinstance(outputs, (tuple, list, deque))
reachable, _clients = stack_search(deque(r_out), compute_deps_cache,
reachable, _clients = stack_search(deque(outputs), compute_deps_cache,
'dfs', True)
if clients is not None:
clients.update(_clients)
......@@ -948,9 +950,9 @@ def general_toposort(r_out, deps, debug_print=False,
rlist.append(node)
rset.add(node)
for client in _clients.get(node, []):
deps_cache[client] = [a for a in deps_cache[client]
if a is not node]
if not deps_cache[client]:
d = [a for a in deps_cache[client] if a is not node]
deps_cache[client] = d
if not d:
sources.append(client)
if len(rlist) != len(reachable):
......@@ -980,17 +982,37 @@ def io_toposort(inputs, outputs, orderings=None, clients=None):
node->clients for each node in the subgraph that is sorted
"""
# the inputs are used only here in the function that decides what 'predecessors' to explore
iset = set(inputs)
if not orderings and clients is None: # ordering can be None or empty dict
# Specialized function that is faster when more then ~10 nodes
# when no ordering.
# We build 2 functions as a speed up
deps_cache = {}
# Do a new stack implementation with the vm algo.
# This will change the order returned.
computed = set(inputs)
todo = [o.owner for o in reversed(outputs) if o.owner]
order = []
while todo:
cur = todo.pop()
# We suppose that all outputs are always computed
if cur.outputs[0] in computed:
continue
if all([i in computed or i.owner is None for i in cur.inputs]):
computed.update(cur.outputs)
order.append(cur)
else:
todo.append(cur)
todo.extend(i.owner for i in cur.inputs if i.owner)
return order
compute_deps = None
compute_deps_cache = None
if not orderings: # can be None or empty dict
iset = set(inputs)
deps_cache = {}
if not orderings: # ordering can be None or empty dict
# Specialized function that is faster when no ordering.
# Also include the cache in the function itself for speed up.
def compute_deps_cache(obj):
if obj in deps_cache:
return deps_cache[obj]
......@@ -1013,6 +1035,9 @@ def io_toposort(inputs, outputs, orderings=None, clients=None):
deps_cache[obj] = rval
return rval
else:
# the inputs are used only here in the function that decides what
# 'predecessors' to explore
def compute_deps(obj):
rval = []
if obj not in iset:
......@@ -1023,7 +1048,7 @@ def io_toposort(inputs, outputs, orderings=None, clients=None):
rval = list(obj.inputs)
rval.extend(orderings.get(obj, []))
else:
assert not orderings.get(obj, [])
assert not orderings.get(obj, None)
return rval
topo = general_toposort(outputs, deps=compute_deps,
......
......@@ -212,7 +212,7 @@ class TestToposort:
o0 = MyOp.make_node(r1, r2)
o1 = MyOp.make_node(r3, r4)
all = io_toposort([r1, r2, r3, r4], o0.outputs + o1.outputs)
assert all == [o1, o0]
assert all == [o1, o0] or all == [o0, o1]
def test_4(self):
"""Test inputs and outputs mixed together in a chain graph"""
......
......@@ -153,7 +153,7 @@ class TestWrapLinker(unittest.TestCase):
i[0].data = 1
i[1].data = 2
fn()
assert nodes == [div, add, mul]
assert nodes == [div, add, mul] or nodes == [add, div, mul]
assert o[0].data is None
def test_1(self):
......@@ -171,7 +171,7 @@ class TestWrapLinker(unittest.TestCase):
i[0].data = 1
i[1].data = 2
fn()
assert nodes == [div, add, mul]
assert nodes == [div, add, mul] or nodes == [add, div, mul]
assert o[0].data == 1.5
......
......@@ -1572,12 +1572,13 @@ class UsmmTests(unittest.TestCase):
# Usmm is tested at the same time in debugmode
# Check if the optimization local_usmm and local_usmm_csx is
# applied
assert isinstance(topo[0].op,
theano.sparse.basic.CSMProperties)
assert isinstance(topo[1].op, theano.tensor.DimShuffle)
assert isinstance(topo[2].op, theano.tensor.Subtensor)
assert topo[3].op == theano.tensor.neg
assert isinstance(topo[4].op, UsmmCscDense)
def check_once(x):
assert sum([isinstance(n.op, x) for n in topo]) == 1
check_once(theano.sparse.basic.CSMProperties)
check_once(theano.tensor.DimShuffle)
check_once(theano.tensor.Subtensor)
check_once(UsmmCscDense)
check_once(theano.tensor.Elemwise)
if inplace:
assert topo[4].op.inplace
elif not fast_compile:
......
......@@ -1629,7 +1629,7 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
def c_code_cache_version_apply(self, node):
# the version corresponding to the c code in this Op
version = [6]
version = [7]
# now we insert versions for the ops on which we depend...
scalar_node = Apply(
......
......@@ -100,13 +100,13 @@ def make_checks(loop_orders, dtypes, sub):
check += """
if (%%(lv%(j0)s)s_n%(x0)s != %%(lv%(j)s)s_n%(x)s)
{
PyErr_Format(PyExc_ValueError, "Input dimension mis-match. (input[%%%%i].shape[%%%%i] = %%%%i, input[%%%%i].shape[%%%%i] = %%%%i)",
PyErr_Format(PyExc_ValueError, "Input dimension mis-match. (input[%%%%i].shape[%%%%i] = %%%%lli, input[%%%%i].shape[%%%%i] = %%%%lli)",
%(j0)s,
%(x0)s,
%%(lv%(j0)s)s_n%(x0)s,
(long long int) %%(lv%(j0)s)s_n%(x0)s,
%(j)s,
%(x)s,
%%(lv%(j)s)s_n%(x)s
(long long int) %%(lv%(j)s)s_n%(x)s
);
%%(fail)s
}
......
......@@ -256,8 +256,10 @@ class T_sigmoid_opts(unittest.TestCase):
[x, y],
(sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) *
tensor.exp(x * y) * tensor.exp(y)), mode=m)
match(f, [sigmoid, tensor.mul, tensor.neg, tensor.exp, sigmoid,
tensor.mul])
topo = f.maker.fgraph.toposort()
for op, nb in [(sigmoid, 2), (tensor.mul, 2),
(tensor.neg, 1), (tensor.exp, 1)]:
assert sum([n.op == op for n in topo]) == nb
# assert check_stack_trace(f, ops_to_check=[sigmoid, tensor.mul,
# tensor.exp])
......
......@@ -1568,14 +1568,19 @@ def test_log1p():
y = fmatrix()
f = function([x, y], T.log(tensor.fill(y, 1) + (x)), mode=m)
# the first three ops are Shape_i, Shape_i, and Dimshuffle
assert [node.op for node in f.maker.fgraph.toposort()][3:] == [
T.log1p, tensor.alloc]
topo = f.maker.fgraph.toposort()
assert topo[-1].op == tensor.alloc
assert T.log1p in [node.op for node in topo]
f = function([x, y], T.log(0 + (x) + tensor.fill(y, 1.0)), mode=m)
assert [node.op for node in f.maker.fgraph.toposort()][3:] == [
T.log1p, tensor.alloc]
topo = f.maker.fgraph.toposort()
assert topo[-1].op == tensor.alloc
assert T.log1p in [node.op for node in topo]
f = function([x, y], T.log(2 + (x) - tensor.fill(y, 1.0)), mode=m)
assert ([node.op for node in f.maker.fgraph.toposort()][3:] ==
[T.log1p, tensor.alloc])
topo = f.maker.fgraph.toposort()
assert topo[-1].op == tensor.alloc
assert T.log1p in [node.op for node in topo]
f([1e-7, 10], [[0, 0], [0, 0]]) # debugmode will verify values
......@@ -2207,8 +2212,9 @@ class test_local_subtensor_lift(unittest.TestCase):
assert isinstance(prog[0].op, tensor.DimShuffle)
assert isinstance(prog[1].op.scalar_op, theano.scalar.
Composite) # Composite{add,exp}
assert prog[2].op == tensor.add
assert isinstance(prog[3].op, tensor.Subtensor) # first subtensor
assert prog[2].op == tensor.add or prog[3].op == tensor.add
# first subtensor
assert isinstance(prog[2].op, tensor.Subtensor) or isinstance(prog[3].op, tensor.Subtensor)
assert len(prog) == 4
f([[0, 1], [2, 3]], [4, 5]) # let debugmode test something
......
......@@ -252,11 +252,11 @@ def test_debugprint():
s = s.getvalue()
# The additional white space are needed!
reference = '\n'.join([
"Elemwise{add,no_inplace} [id A] '' 0 clients:[('[id B]', 1), ('output', '')]",
"Elemwise{add,no_inplace} [id A] '' 0 clients:[('output', ''), ('[id C]', 1)]",
" |A [id D]",
" |B [id E]",
"Elemwise{sub,no_inplace} [id B] '' 1",
" |Elemwise{add,no_inplace} [id A] '' 0 clients:[('[id B]', 1), ('output', '')]",
"Elemwise{sub,no_inplace} [id C] '' 1",
" |Elemwise{add,no_inplace} [id A] '' 0 clients:[('output', ''), ('[id C]', 1)]",
" |D [id F]",
]) + '\n'
if s != reference:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论