Add comments

4abd8af7 · Roy Xue · 9d264092 · 4abd8af7
--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -671,15 +671,20 @@ class ProfileStats(object):
            node_memory_saved_by_inplace = 0
            dependencies = fgraph.profile.dependencies

+            # two data structure used to mimic Python gc
+            view_of = {} # {var1: original var viewed by var1}
+            # The orignal mean that we don't keep trac of all the intermediate relationship in the view.
+            viewed_by = {}# {var1: [vars that view var1]} 
+            # The len of the list is the value of python ref count. But we use a list, not just the ref count value. 
+            # This is more safe to help detect potential bug  in the algo
+            for var in fgraph.variable
+
+
            for node in order:
                idx = 0
                dmap = getattr(node.op, 'destroy_map', None)
                vmap = getattr(node.op, 'view_map', None)
                val = nodes_mem[node]
-                viewed_by = {}
-                for i in node.inputs:
-                    viewed_by[i] = []
-                view_of = {}

                for v in val:
                    # TODO check the op returned a view
@@ -690,19 +695,27 @@ class ProfileStats(object):
                        node_memory_saved_by_view += v
                    idx += 1

+                # Update the Python emulating dicts and add the memory allocated by the node
+                idx2 = 0
                for out in node.outputs:
-                        if (dmap or vmap):
-                            for i in node.inputs:
-                                view_of[out] = view_of.get(i, i)
-                                viewed_by[i].append(out)  
+                        if (dmap and idx2 in dmap) or (vmap and idx2 in vmap):
+                            # This is needed for destroy_map in case it return a partial view that is destroyed.
+                            # So the output could be different then the input.
+                            for ins in node.inputs:
+                                assert len[ins] == 1
+                                view_of[out] = view_of.get(ins, ins)# This get make that we keep trac of view only again the original
+                                viewed_by[ins].append(out)  
                        else:
                            running_memory_size += var_mem[out]
                            node_memory_size += var_mem[out]
+                        idx2 += 1

                running_max_memory_size = max(running_max_memory_size, running_memory_size)

+                # Mimic the combination of Theano and Python gc
                for ins in node.inputs:
                    assert not (ins in view_of and i in viewed_by)
+                    # we keep trac of the original var, so this shouldn't happen
                    if dependencies[ins] and ins not in fgraph.outputs:
                        if ins not in view_of and not viewed_by.get(ins, []):
                            running_memory_size -= var_mem[ins]
@@ -711,9 +724,11 @@ class ProfileStats(object):
                            viewed_by[origin].remove(ins)
                            if not viewed_by[origin]:
                                running_memory_size -= var_mem[origin]
+                    else:
+                        # ins is viewed_by something else, so its memory isn't freed
+                        pass

-            return [node_memory_size, running_memory_size, running_max_memory_size,
-                 node_memory_saved_by_inplace, node_memory_saved_by_view]
+            return [node_memory_size, running_memory_size, running_max_memory_size, node_memory_saved_by_inplace, node_memory_saved_by_view]

        def count_minimum_peak(node_list, fgraph, nodes_mem):
            global maybe_executed, mem_count, mem_bound, max_mem_count
@@ -754,10 +769,18 @@ class ProfileStats(object):
                    if c != "output" and check_node_state(c):
                        executable_nodes.add(c)

+            viewed_by = {}# {var1:[vars that view var1]}
+            # The len of the list is the value of python ref count. But we use a list, not just the ref count value.
+            # This is more safe to help detect potential bug  in the algo
+            for i in node.inputs:
+                viewed_by[i] = []
+            view_of = {}# {var1: original var viewed by var1}
+            # The orignal mean that we don't keep trac of all the intermediate relationship in the view.
+
            def min_memory_generator(executable_nodes):
                """
                Generate all valid node order from node_list
-                and compute its memory peaf
+                and compute its memory peak

                :param executable_nodes: Set of executable nodes
                """
@@ -778,29 +801,30 @@ class ProfileStats(object):
                    mem_freed = 0
                    max_storage = max_mem_count

-                    # {var1:[vars that view var1]}
-                    viewed_by = {}
-                    for i in node.inputs:
-                        viewed_by[i] = []
-                    # {var1: original var viewed by var1}
-                    view_of = {}
                    dmap = getattr(node.op, 'destroy_map', None)
                    vmap = getattr(node.op, 'view_map', None)

+                    idx = 0
+                    # Update the Python emulating dicts and add the memory allocated by the node
                    for out in node.outputs:
-                        if (dmap or vmap):
-                            for i in node.inputs:
-                                view_of[out] = view_of.get(i, i)
-                                viewed_by[i].append(out)  
+                        if (dmap and idx in dmap) or (vmap and idx in vmap):
+                        # This is needed for destroy_map in case it return a partial view that is destroyed.
+                        # So the output could be different then the input.
+                            for ins in node.inputs:
+                                view_of[out] = view_of.get(ins, ins)# This get make that we keep trac of view only again the original
+                                viewed_by[ins].append(out)  
                        else:
                            mem_created += var_mem[out]
+                        idx += 1

                    mem_count += mem_created
                    max_mem_count = max(max_mem_count, mem_count)

+                    # Mimic the combination of Theano and Python gc.
                    for ins in node.inputs:
-                        assert not (ins in view_of and i in viewed_by)
-                        if dependencies[ins] and ins not in fgraph.outputs:
+                        assert not (ins in view_of and ins in viewed_by)
+                        # we keep track of the original var, so this shouldn't happen
+                        if dependencies[ins] and ins not in fgraph.outputs and ins.owner:
                            if all(compute_map[v] for v in dependencies[ins]):
                                if ins not in view_of and not viewed_by.get(ins, []):
                                    mem_freed += var_mem[ins]
@@ -810,6 +834,7 @@ class ProfileStats(object):
                                    if not viewed_by[origin]:
                                        mem_freed += var_mem[origin]
                        else:
+                            # ins is viewed_by something else, so its memory isn't freed
                            pass

                    mem_count -= mem_freed