Merge pull request #4420 from nouiz/small_dup

Small stuff

Merge pull request #4420 from nouiz/small_dup
beefa939 · abergeron · 02548d27 · 77443932 · beefa939 · beefa939
--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -512,7 +512,8 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
               print_view_map=False, order=None, ids='CHAR',
               stop_on_name=False, prefix_child=None,
               scan_ops=None, profile=None,
-               scan_inner_to_outer_inputs=None, smap=None):
+               scan_inner_to_outer_inputs=None, smap=None,
+               used_ids=None, print_clients=False):
    """
    Print the graph leading to `r` to given depth.
@@ -525,7 +526,8 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
    depth
        Maximum recursion depth (Default -1 for unlimited).
    done
-        dict of Apply instances that have already been printed and their
+        Internal. Used to pass information when recursing.
+        Dict of Apply instances that have already been printed and their
        associated printed ids.
    print_type
        Whether to print the Variable type after the other infos.
@@ -554,6 +556,12 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
        inputs (outer inputs) for printing purposes.
    smap
        None or the storage_map when printing an Theano function.
+    used_ids
+        Internal. Used to pass information when recursing.
+        It is a dict from obj to the id used for it.
+        It wasn't always printed, but at least a reference to it was printed.
+    print_clients
+        If True, we will print the clients of nodes when they have more then one clients.
    """
    if depth == 0:
        return
@@ -575,19 +583,25 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
    if prefix_child is None:
        prefix_child = prefix
-    def get_id_str(obj):
+    if used_ids is None:
-        if obj in done:
+        used_ids = dict()
-            id_str = done[obj]
+    def get_id_str(obj, get_printed=True):
+        if obj in used_ids:
+            id_str = used_ids[obj]
+        elif obj == 'output':
+            id_str = 'output'
        elif ids == "id":
            id_str = "[id %s]" % str(id(r))
        elif ids == "int":
-            id_str = "[id %s]" % str(len(done))
+            id_str = "[id %s]" % str(len(used_ids))
        elif ids == "CHAR":
-            id_str = "[id %s]" % char_from_number(len(done))
+            id_str = "[id %s]" % char_from_number(len(used_ids))
        elif ids == "":
            id_str = ""
+        if get_printed:
            done[obj] = id_str
+        used_ids[obj] = id_str
        return id_str
    if hasattr(r.owner, 'op'):
@@ -629,14 +643,23 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
        data = ""
        if smap:
            data = " " + str(smap.get(a.outputs[0], ''))
+        clients = ''
+        if print_clients and len(getattr(r, 'clients', [])) > 1:
+            def get_index(c):
+                try:
+                    return order.index(c)
+                except ValueError:
+                    return ""
+            clients = " clients:" + str([(get_id_str(c, False), get_index(c))
+                                         for c, i in r.clients])
        if profile is None or a not in profile.apply_time:
-            print('%s%s%s %s%s \'%s\' %s %s %s%s' % (prefix, a.op,
+            print('%s%s%s %s%s \'%s\' %s %s %s%s%s' % (prefix, a.op,
                                                       idx,
                                                       id_str, type_str,
                                                       r_name,
                                                       destroy_map_str,
                                                       view_map_str,
-                                                     o, data), file=file)
+                                                       o, data, clients), file=file)
        else:
            op_time = profile.apply_time[a]
            op_time_percent = (op_time / profile.fct_call_time) * 100
@@ -648,7 +671,7 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
                idx = ""
            else:
                idx = ".%i" % a.outputs.index(r)
-            print("%s%s%s %s%s '%s' %s %s %s%s --> "
+            print("%s%s%s %s%s '%s' %s %s %s%s%s --> "
                  "%8.2es %4.1f%% %8.2es %4.1f%%"
                  % (prefix, a.op,
                     idx,
@@ -656,7 +679,7 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
                     r_name,
                     destroy_map_str,
                     view_map_str,
-                     o, data,
+                     o, data, clients,
                     op_time,
                     op_time_percent,
                     tot_time,
@@ -684,7 +707,7 @@ def debugprint(r, prefix='', depth=-1, done=None, print_type=False,
                        prefix_child=new_prefix_child, scan_ops=scan_ops,
                        profile=profile,
                        scan_inner_to_outer_inputs=scan_inner_to_outer_inputs,
-                        smap=smap)
+                        smap=smap, used_ids=used_ids, print_clients=print_clients)
    else:
        if scan_inner_to_outer_inputs is not None and\
           r in scan_inner_to_outer_inputs:
@@ -1672,13 +1695,16 @@ class _VariableEquivalenceTracker(object):
            # N.B. compute the debugprint now, because future
            # optimizations will change the graph
            done = dict()
+            used_ids = dict()
            self.reasons[new_r].append(
                (reason,
                 r,
                 debugprint(r, prefix='  ', depth=6,
-                            file=StringIO(), done=done).getvalue(),
+                            file=StringIO(), done=done,
+                            used_ids=used_ids).getvalue(),
                 debugprint(new_r, prefix='  ', depth=6,
-                            file=StringIO(), done=done).getvalue()))
+                            file=StringIO(), done=done,
+                            used_ids=used_ids).getvalue()))
            self.replaced_by[r].append((reason, new_r))
        if r in self.equiv:

--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -822,7 +822,7 @@ class ProfileStats(object):
                                                 running_memory_size[1])
                # Mimic the combination of Theano and Python gc
-                for ins in node.inputs:
+                for ins in set(node.inputs):
                    assert not (ins in view_of and viewed_by[ins])
                    # we trac the original var, so this shouldn't happen
                    if isinstance(ins.type, CudaNdarrayType):

--- a/theano/gof/graph.py
+++ b/theano/gof/graph.py
@@ -819,7 +819,7 @@ def clone_get_equiv(inputs, outputs, copy_inputs_and_orphans=True, memo=None):
    original graph to a new node (a clone) in a new graph.
    This function works by recursively cloning inputs... rebuilding a directed
-    graph from the bottom (inputs) up to eventually building new outputs.
+    graph from the inputs up to eventually building new outputs.
    Parameters
    ----------

--- a/theano/gof/vm.py
+++ b/theano/gof/vm.py
@@ -1066,6 +1066,7 @@ class VM_Linker(link.LocalLinker):
                          )
        vm.storage_map = storage_map
+        vm.compute_map = compute_map
        return (vm,
                [link.Container(input, storage)

--- a/theano/misc/check_blas.py
+++ b/theano/misc/check_blas.py
@@ -232,7 +232,7 @@ if __name__ == "__main__":
        GT 610            2.38s
        GTX 550 Ti                                                  0.57s
        GT 520                                        2.68s                3.06s
-        520M                                   2.44s                       3.19s        # with bumblebee on Ubuntu 12.04
+        GT 520M                                2.44s                       3.19s        # with bumblebee on Ubuntu 12.04
        GT 220                                                             3.80s
        GT 210                                                      6.35s
        8500 GT                                                                   10.68s

--- a/theano/printing.py
+++ b/theano/printing.py
@@ -23,19 +23,26 @@ from theano.compile import Function, debugmode, SharedVariable
 from theano.compile.profilemode import ProfileMode
 pydot_imported = False
+pydot_imported_msg = ""
 try:
    # pydot-ng is a fork of pydot that is better maintained
    import pydot_ng as pd
    if pd.find_graphviz():
        pydot_imported = True
+    else:
+        pydot_imported_msg = "pydot-ng can't find graphviz"
 except ImportError:
    try:
        # fall back on pydot if necessary
        import pydot as pd
        if pd.find_graphviz():
            pydot_imported = True
+        else:
+            pydot_imported_msg = "pydot can't find graphviz"
    except ImportError:
-        pass  # tests should not fail on optional dependency
+        # tests should not fail on optional dependency
+        pydot_imported_msg = "Install the python package pydot or pydot-ng."
 _logger = logging.getLogger("theano.printing")
 VALID_ASSOC = set(['left', 'right', 'either'])
@@ -43,7 +50,8 @@ VALID_ASSOC = set(['left', 'right', 'either'])
 def debugprint(obj, depth=-1, print_type=False,
               file=None, ids='CHAR', stop_on_name=False,
-               done=None, print_storage=False):
+               done=None, print_storage=False, print_clients=False,
+               used_ids=None):
    """Print a computation graph as text to stdout or a file.
    :type obj: Variable, Apply, or Function instance
@@ -69,6 +77,13 @@ def debugprint(obj, depth=-1, print_type=False,
    :param print_storage: If True, this will print the storage map
        for Theano functions. Combined with allow_gc=False, after the
        execution of a Theano function, we see the intermediate result.
+    :type print_clients: bool
+    :param print_clients: If True, this will print for Apply node that
+         have more then 1 clients its clients. This help find who use
+         an Apply node.
+    :type used_ids: dict or None
+    :param used_ids: the id to use for some object, but maybe we only
+         refered to it yet.
    :returns: string if `file` == 'str', else file arg
@@ -98,6 +113,9 @@ def debugprint(obj, depth=-1, print_type=False,
        _file = file
    if done is None:
        done = dict()
+    if used_ids is None:
+        used_ids = dict()
+    used_ids = dict()
    results_to_print = []
    profile_list = []
    order = []  # Toposort
@@ -178,7 +196,8 @@ N.B.:
        debugmode.debugprint(r, depth=depth, done=done, print_type=print_type,
                             file=_file, order=o, ids=ids,
                             scan_ops=scan_ops, stop_on_name=stop_on_name,
-                             profile=p, smap=s)
+                             profile=p, smap=s, used_ids=used_ids,
+                             print_clients=print_clients)
    if len(scan_ops) > 0:
        print("", file=_file)
@@ -208,7 +227,8 @@ N.B.:
                file=_file, ids=ids,
                scan_ops=scan_ops,
                stop_on_name=stop_on_name,
-                scan_inner_to_outer_inputs=inner_to_outer_inputs)
+                scan_inner_to_outer_inputs=inner_to_outer_inputs,
+                print_clients=print_clients, used_ids=used_ids)
            if hasattr(s.owner.op, 'fn'):
                # If the op was compiled, print the optimized version.
                outputs = s.owner.op.fn.maker.fgraph.outputs
@@ -227,7 +247,8 @@ N.B.:
                    ids=ids, stop_on_name=stop_on_name,
                    prefix_child=new_prefix_child,
                    scan_ops=scan_ops,
-                    scan_inner_to_outer_inputs=inner_to_outer_inputs)
+                    scan_inner_to_outer_inputs=inner_to_outer_inputs,
+                    print_clients=print_clients, used_ids=used_ids)
    if file is _file:
        return file
@@ -727,7 +748,8 @@ def pydotprint(fct, outfile=None,
        topo = fct.toposort()
    if not pydot_imported:
        raise RuntimeError("Failed to import pydot. You must install pydot"
-                           " and graphviz for `pydotprint` to work.")
+                           " and graphviz for `pydotprint` to work.",
+                           pydot_imported_msg)
    g = pd.Dot()
@@ -1062,7 +1084,8 @@ def pydotprint_variables(vars,
                               config.device + '.' + format)
    if not pydot_imported:
        raise RuntimeError("Failed to import pydot. You must install pydot"
-                           " and graphviz for `pydotprint_variables` to work.")
+                           " and graphviz for `pydotprint_variables` to work.",
+                           pydot_imported_msg)
    if pd.__name__ == "pydot_ng":
        raise RuntimeError("pydotprint_variables do not support pydot_ng."
                           "pydotprint_variables is also deprecated, "

--- a/theano/sandbox/gpuarray/opt.py
+++ b/theano/sandbox/gpuarray/opt.py
@@ -209,7 +209,7 @@ gpu_seqopt.register('InputToGpuArrayOptimizer', InputToGpuOptimizer(),
                    0, 'fast_run', 'fast_compile', 'merge')
-@local_optimizer([GpuFromHost, GpuToGpu, host_from_gpu])
+@local_optimizer([GpuFromHost, GpuToGpu, HostFromGpu])
 def local_cut_gpu_transfers(node):
    # gpu[ab] -> host -> gpub
    if (isinstance(node.op, GpuFromHost) and

--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -4829,7 +4829,7 @@ class ScanGpuTests:
        # The grad scan is always the 2nd one according to toposort. If the
        # optimization has been applied, it has 2 outputs, otherwise 3.
        grad_scan_node = scan_nodes[1]
-        assert len(grad_scan_node.outputs) == 2
+        assert len(grad_scan_node.outputs) == 2, len(grad_scan_node.outputs)
        # Call the theano function to ensure the absence of a memory error
        feval_backprop(numpy.zeros((mb_length, mb_size, n_in),

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -1577,9 +1577,8 @@ local_elemwise_alloc = register_specialize(
 @gof.local_optimizer([T.Elemwise])
 def local_fill_sink(node):
    """
-    f(fill(a, b), fill(c, d), e) -> fill(a, fill(c, f(b, d, e)))
+    f(fill(a, b), fill(c, d), e) -> fill(c, fill(a, f(b, d, e)))
+    f need to be an elemwise that isn't a fill.
-    f need to be an elemwise
    """
    if (not hasattr(node, 'op') or
            not isinstance(node.op, T.Elemwise) or

--- a/theano/tests/test_printing.py
+++ b/theano/tests/test_printing.py
@@ -169,7 +169,8 @@ def test_debugprint():
    g = theano.function([A, B, D, E], G, mode=mode)
    # just test that it work
-    debugprint(G)
+    s = StringIO()
+    debugprint(G, file=s)
    # test ids=int
    s = StringIO()
@@ -270,6 +271,28 @@ def test_debugprint():
    assert s == reference
+    # test clients
+    s = StringIO()
+    # We must force the mode as otherwise it can change the clients order
+    f = theano.function([A, B, D], [A + B, A + B - D],
+                        mode='FAST_COMPILE')
+    debugprint(f, file=s, print_clients=True)
+    s = s.getvalue()
+    # The additional white space are needed!
+    reference = '\n'.join([
+        "Elemwise{add,no_inplace} [id A] ''   0 clients:[('[id B]', 1), ('output', '')]",
+        " |A [id D]",
+        " |B [id E]",
+        "Elemwise{sub,no_inplace} [id B] ''   1",
+        " |Elemwise{add,no_inplace} [id A] ''   0 clients:[('[id B]', 1), ('output', '')]",
+        " |D [id F]",
+    ]) + '\n'
+    if s != reference:
+        print('--' + s + '--')
+        print('--' + reference + '--')
+    assert s == reference
 def test_scan_debugprint1():
    k = tensor.iscalar("k")