Merge pull request #5502 from ReyhaneAskari/io_toposort_5042

IO_toposort

Merge pull request #5502 from ReyhaneAskari/io_toposort_5042
d89bd8ec · Frédéric Bastien · GitHub · 88c6d32a · 74e22c3b · d89bd8ec
--- a/doc/tutorial/shape_info.txt
+++ b/doc/tutorial/shape_info.txt
@@ -53,9 +53,9 @@ can lead to errors. Consider this example:
 >>> theano.printing.debugprint(f) # doctest: +NORMALIZE_WHITESPACE
 MakeVector{dtype='int64'} [id A] ''   4
 |Elemwise{Add}[(0, 0)] [id B] ''   3
- | |Shape_i{0} [id C] ''   1
+ | |Shape_i{0} [id C] ''   2
 | | |x [id D]
- | |Shape_i{0} [id E] ''   2
+ | |Shape_i{0} [id E] ''   1
 |   |y [id F]
 |Shape_i{1} [id G] ''   0
   |x [id D]

--- a/theano/compile/tests/test_profiling.py
+++ b/theano/compile/tests/test_profiling.py
@@ -56,8 +56,8 @@ class Test_profiling(unittest.TestCase):
            lines1 = [l for l in the_string.split("\n") if "Max if linker" in l]
            lines2 = [l for l in the_string.split("\n") if "Minimum peak" in l]
            if theano.config.device == 'cpu':
-                assert "CPU: 4112KB (8204KB)" in the_string, (lines1, lines2)
-                assert "CPU: 8204KB (12296KB)" in the_string, (lines1, lines2)
+                assert "CPU: 4112KB (4104KB)" in the_string, (lines1, lines2)
+                assert "CPU: 8204KB (8196KB)" in the_string, (lines1, lines2)
                assert "CPU: 8208KB" in the_string, (lines1, lines2)
                assert "Minimum peak from all valid apply node order is 4104KB" in the_string, (
                    lines1, lines2)

--- a/theano/gof/graph.py
+++ b/theano/gof/graph.py
@@ -608,6 +608,8 @@ def stack_search(start, expand, mode='bfs', build_inv=False):
    expand : callable
        When we get to a node, add expand(node) to the list of nodes to visit.
        This function should return a list, or None.
+    mode : string
+        'bfs' or 'dfs' for breath first search or depth first search.

    Returns
    -------
@@ -632,7 +634,7 @@ def stack_search(start, expand, mode='bfs', build_inv=False):
        start_pop = start.popleft
    else:
        start_pop = start.pop
-    expand_inv = {}
+    expand_inv = {}  # var: clients
    while start:
        l = start_pop()
        if id(l) not in rval_set:
@@ -878,7 +880,7 @@ def clone_get_equiv(inputs, outputs, copy_inputs_and_orphans=True, memo=None):
    return memo


-def general_toposort(r_out, deps, debug_print=False,
+def general_toposort(outputs, deps, debug_print=False,
                     compute_deps_cache=None, deps_cache=None,
                     clients=None):
    """
@@ -932,9 +934,9 @@ def general_toposort(r_out, deps, debug_print=False,
                return deps_cache[io]
    assert deps_cache is not None

-    assert isinstance(r_out, (tuple, list, deque))
+    assert isinstance(outputs, (tuple, list, deque))

-    reachable, _clients = stack_search(deque(r_out), compute_deps_cache,
+    reachable, _clients = stack_search(deque(outputs), compute_deps_cache,
                                       'dfs', True)
    if clients is not None:
        clients.update(_clients)
@@ -948,9 +950,9 @@ def general_toposort(r_out, deps, debug_print=False,
            rlist.append(node)
            rset.add(node)
            for client in _clients.get(node, []):
-                deps_cache[client] = [a for a in deps_cache[client]
-                                      if a is not node]
-                if not deps_cache[client]:
+                d = [a for a in deps_cache[client] if a is not node]
+                deps_cache[client] = d
+                if not d:
                    sources.append(client)

    if len(rlist) != len(reachable):
@@ -980,17 +982,37 @@ def io_toposort(inputs, outputs, orderings=None, clients=None):
        node->clients for each node in the subgraph that is sorted

    """
-    # the inputs are used only here in the function that decides what 'predecessors' to explore
-    iset = set(inputs)
+    if not orderings and clients is None:  # ordering can be None or empty dict
+        # Specialized function that is faster when more then ~10 nodes
+        # when no ordering.

-    # We build 2 functions as a speed up
-    deps_cache = {}
+        # Do a new stack implementation with the vm algo.
+        # This will change the order returned.
+        computed = set(inputs)
+        todo = [o.owner for o in reversed(outputs) if o.owner]
+        order = []
+        while todo:
+            cur = todo.pop()
+            # We suppose that all outputs are always computed
+            if cur.outputs[0] in computed:
+                continue
+            if all([i in computed or i.owner is None for i in cur.inputs]):
+                computed.update(cur.outputs)
+                order.append(cur)
+            else:
+                todo.append(cur)
+                todo.extend(i.owner for i in cur.inputs if i.owner)
+        return order

    compute_deps = None
    compute_deps_cache = None
-    if not orderings:  # can be None or empty dict
+    iset = set(inputs)
+    deps_cache = {}
+
+    if not orderings:  # ordering can be None or empty dict
        # Specialized function that is faster when no ordering.
        # Also include the cache in the function itself for speed up.
+
        def compute_deps_cache(obj):
            if obj in deps_cache:
                return deps_cache[obj]
@@ -1013,6 +1035,9 @@ def io_toposort(inputs, outputs, orderings=None, clients=None):
                deps_cache[obj] = rval
            return rval
    else:
+
+        # the inputs are used only here in the function that decides what
+        # 'predecessors' to explore
        def compute_deps(obj):
            rval = []
            if obj not in iset:
@@ -1023,7 +1048,7 @@ def io_toposort(inputs, outputs, orderings=None, clients=None):
                    rval = list(obj.inputs)
                rval.extend(orderings.get(obj, []))
            else:
-                assert not orderings.get(obj, [])
+                assert not orderings.get(obj, None)
            return rval

    topo = general_toposort(outputs, deps=compute_deps,

--- a/theano/gof/tests/test_graph.py
+++ b/theano/gof/tests/test_graph.py
@@ -212,7 +212,7 @@ class TestToposort:
        o0 = MyOp.make_node(r1, r2)
        o1 = MyOp.make_node(r3, r4)
        all = io_toposort([r1, r2, r3, r4], o0.outputs + o1.outputs)
-        assert all == [o1, o0]
+        assert all == [o1, o0] or all == [o0, o1]

    def test_4(self):
        """Test inputs and outputs mixed together in a chain graph"""

--- a/theano/gof/tests/test_link.py
+++ b/theano/gof/tests/test_link.py
@@ -153,7 +153,7 @@ class TestWrapLinker(unittest.TestCase):
        i[0].data = 1
        i[1].data = 2
        fn()
-        assert nodes == [div, add, mul]
+        assert nodes == [div, add, mul] or nodes == [add, div, mul]
        assert o[0].data is None

    def test_1(self):
@@ -171,7 +171,7 @@ class TestWrapLinker(unittest.TestCase):
        i[0].data = 1
        i[1].data = 2
        fn()
-        assert nodes == [div, add, mul]
+        assert nodes == [div, add, mul] or nodes == [add, div, mul]
        assert o[0].data == 1.5



--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -1572,12 +1572,13 @@ class UsmmTests(unittest.TestCase):
                # Usmm is tested at the same time in debugmode
                # Check if the optimization local_usmm and local_usmm_csx is
                # applied
-                assert isinstance(topo[0].op,
-                                  theano.sparse.basic.CSMProperties)
-                assert isinstance(topo[1].op, theano.tensor.DimShuffle)
-                assert isinstance(topo[2].op, theano.tensor.Subtensor)
-                assert topo[3].op == theano.tensor.neg
-                assert isinstance(topo[4].op, UsmmCscDense)
+                def check_once(x):
+                    assert sum([isinstance(n.op, x) for n in topo]) == 1
+                check_once(theano.sparse.basic.CSMProperties)
+                check_once(theano.tensor.DimShuffle)
+                check_once(theano.tensor.Subtensor)
+                check_once(UsmmCscDense)
+                check_once(theano.tensor.Elemwise)
                if inplace:
                    assert topo[4].op.inplace
            elif not fast_compile:

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -1629,7 +1629,7 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){

    def c_code_cache_version_apply(self, node):
        # the version corresponding to the c code in this Op
-        version = [6]
+        version = [7]

        # now we insert versions for the ops on which we depend...
        scalar_node = Apply(

--- a/theano/tensor/elemwise_cgen.py
+++ b/theano/tensor/elemwise_cgen.py
@@ -100,13 +100,13 @@ def make_checks(loop_orders, dtypes, sub):
            check += """
            if (%%(lv%(j0)s)s_n%(x0)s != %%(lv%(j)s)s_n%(x)s)
            {
-                PyErr_Format(PyExc_ValueError, "Input dimension mis-match. (input[%%%%i].shape[%%%%i] = %%%%i, input[%%%%i].shape[%%%%i] = %%%%i)",
+                PyErr_Format(PyExc_ValueError, "Input dimension mis-match. (input[%%%%i].shape[%%%%i] = %%%%lli, input[%%%%i].shape[%%%%i] = %%%%lli)",
                   %(j0)s,
                   %(x0)s,
-                   %%(lv%(j0)s)s_n%(x0)s,
+                   (long long int) %%(lv%(j0)s)s_n%(x0)s,
                   %(j)s,
                   %(x)s,
-                   %%(lv%(j)s)s_n%(x)s
+                   (long long int) %%(lv%(j)s)s_n%(x)s
                );
                %%(fail)s
            }

--- a/theano/tensor/nnet/tests/test_sigm.py
+++ b/theano/tensor/nnet/tests/test_sigm.py
@@ -256,8 +256,10 @@ class T_sigmoid_opts(unittest.TestCase):
            [x, y],
            (sigmoid(x) * sigmoid(-y) * -tensor.exp(-x) *
                tensor.exp(x * y) * tensor.exp(y)), mode=m)
-        match(f, [sigmoid, tensor.mul, tensor.neg, tensor.exp, sigmoid,
-                  tensor.mul])
+        topo = f.maker.fgraph.toposort()
+        for op, nb in [(sigmoid, 2), (tensor.mul, 2),
+                       (tensor.neg, 1), (tensor.exp, 1)]:
+            assert sum([n.op == op for n in topo]) == nb
        # assert check_stack_trace(f, ops_to_check=[sigmoid, tensor.mul,
        #                                           tensor.exp])


--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -1568,14 +1568,19 @@ def test_log1p():
    y = fmatrix()
    f = function([x, y], T.log(tensor.fill(y, 1) + (x)), mode=m)
    # the first three ops are Shape_i, Shape_i, and Dimshuffle
-    assert [node.op for node in f.maker.fgraph.toposort()][3:] == [
-        T.log1p, tensor.alloc]
+    topo = f.maker.fgraph.toposort()
+    assert topo[-1].op == tensor.alloc
+    assert T.log1p in [node.op for node in topo]
+
    f = function([x, y], T.log(0 + (x) + tensor.fill(y, 1.0)), mode=m)
-    assert [node.op for node in f.maker.fgraph.toposort()][3:] == [
-        T.log1p, tensor.alloc]
+    topo = f.maker.fgraph.toposort()
+    assert topo[-1].op == tensor.alloc
+    assert T.log1p in [node.op for node in topo]
+
    f = function([x, y], T.log(2 + (x) - tensor.fill(y, 1.0)), mode=m)
-    assert ([node.op for node in f.maker.fgraph.toposort()][3:] ==
-            [T.log1p, tensor.alloc])
+    topo = f.maker.fgraph.toposort()
+    assert topo[-1].op == tensor.alloc
+    assert T.log1p in [node.op for node in topo]

    f([1e-7, 10], [[0, 0], [0, 0]])  # debugmode will verify values

@@ -2207,8 +2212,9 @@ class test_local_subtensor_lift(unittest.TestCase):
        assert isinstance(prog[0].op, tensor.DimShuffle)
        assert isinstance(prog[1].op.scalar_op, theano.scalar.
                          Composite)  # Composite{add,exp}
-        assert prog[2].op == tensor.add
-        assert isinstance(prog[3].op, tensor.Subtensor)  # first subtensor
+        assert prog[2].op == tensor.add or prog[3].op == tensor.add
+        # first subtensor
+        assert isinstance(prog[2].op, tensor.Subtensor) or isinstance(prog[3].op, tensor.Subtensor)
        assert len(prog) == 4
        f([[0, 1], [2, 3]], [4, 5])  # let debugmode test something


--- a/theano/tests/test_printing.py
+++ b/theano/tests/test_printing.py
@@ -252,11 +252,11 @@ def test_debugprint():
    s = s.getvalue()
    # The additional white space are needed!
    reference = '\n'.join([
-        "Elemwise{add,no_inplace} [id A] ''   0 clients:[('[id B]', 1), ('output', '')]",
+        "Elemwise{add,no_inplace} [id A] ''   0 clients:[('output', ''), ('[id C]', 1)]",
        " |A [id D]",
        " |B [id E]",
-        "Elemwise{sub,no_inplace} [id B] ''   1",
-        " |Elemwise{add,no_inplace} [id A] ''   0 clients:[('[id B]', 1), ('output', '')]",
+        "Elemwise{sub,no_inplace} [id C] ''   1",
+        " |Elemwise{add,no_inplace} [id A] ''   0 clients:[('output', ''), ('[id C]', 1)]",
        " |D [id F]",
    ]) + '\n'
    if s != reference: