Fixes to the memory_profile with multiple fct in the same profile.

Also fix the printing.

Fixes to the memory_profile with multiple fct in the same profile.
49d1e2f8 · Frederic · 96548df9 · 49d1e2f8
--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -580,7 +580,8 @@ class ProfileStats(object):
    def summary_memory(self, file, N=None):
        fct_memory = {}  # fgraph->dict(node->(outputs size))
        fct_shapes = {}  # fgraph->dict(node->[outputs shapes]))
-        var_mem = {}  # varible->size in bytes, ignore the input variable
+        var_mem = {}  # varible->size in bytes; don't include input variables
+        node_mem = {}  # node->total outputs size
        for node in self.apply_callcount.keys():
            fct_memory.setdefault(node.fgraph, {})
@@ -597,6 +598,7 @@ class ProfileStats(object):
                var_mem[out] = v
                fct_memory[node.fgraph][node].append(v)
                fct_shapes[node.fgraph][node].append(sh)
+            node_mem[node] = sum([var_mem[var] for var in node.outputs])
        #Find the function that used the most memory
        max_sum_size = 0
@@ -657,57 +659,27 @@ class ProfileStats(object):
                                                node_memory_saved_by_view)
            max_node_memory_saved_by_inplace = max(
                max_node_memory_saved_by_inplace, node_memory_saved_by_inplace)
+        del fgraph, nodes_mem, items, post_thunk_old_storage, node
-        print
        if len(fct_memory) > 1:
-            print "Memory Profile (the max between all function in that profile)"
+            print >> file,  ("Memory Profile "
+                             "(the max between all function in that profile)")
        else:
-            print "Memory Profile"
+            print >> file,  "Memory Profile"
-        print "---"
+        print >> file,  "---"
-        size_sum = sum(var_mem.values())
-        print "    Max without gc, inplace and view: %dKB" % int(
+        print >> file,  "    Max without gc, inplace and view: %dKB" % int(
-            round(size_sum / 1024))
+            round(max_sum_size / 1024))
-        order = fgraph.toposort()
+        print >> file,  "    Max allow_gc=False: %dKB" % int(round(
-        computed, last_user = theano.gof.link.gc_helper(order)
-        for node in order:
-            post_thunk_old_storage.append([
-                input_idx
-                for input_idx, input in enumerate(node.inputs)
-                if (input in computed) and
-                (input not in fgraph.outputs) and
-                node == last_user[input]])
-        for node, val in items:
-            dmap = getattr(node.op, 'destroy_map', None)
-            vmap = getattr(node.op, 'view_map', None)
-            for idx, v in enumerate(val):
-                # TODO check the op returned a view
-                if dmap and idx in dmap:
-                    node_memory_saved_by_inplace += v
-                # TODO check the op returned a view
-                elif vmap and idx in vmap:
-                    node_memory_saved_by_view += v
-                else:
-                    node_memory_size += v
-                    running_memory_size += v
-                    if running_memory_size > running_max_memory_size:
-                        running_max_memory_size = running_memory_size
-                    old_storage = post_thunk_old_storage[order.index(node)]
-                    for old_s in old_storage:
-                        running_memory_size -= var_mem[node.inputs[old_s]]
-                        pass
-            pass
-        print "    Max allow_gc=False: %dKB" % int(round(
            max_node_memory_size / 1024.))
-        print "    Max linker=c|py: %dKB" % int(round(
+        print >> file,  "    Max linker=c|py: %dKB" % int(round(
            max_running_max_memory_size / 1024.))
-        print "    Memory saved by view: %dKB" % int(round(
+        print >> file,  "    Memory saved by view: %dKB" % int(round(
            max_node_memory_saved_by_view / 1024.))
-        print "    Memory saved by inplace: %dKB" % int(round(
+        print >> file,  "    Memory saved by inplace: %dKB" % int(round(
            max_node_memory_saved_by_inplace / 1024.))
-        print "    Memory saved by GC: %dKB" % int(round((
+        print >> file,  "    Memory saved by GC: %dKB" % int(round((
            max_node_memory_size - max_running_max_memory_size) / 1024.))
        if (hasattr(theano, 'sandbox') and
            hasattr(theano.sandbox, 'cuda') and
@@ -715,31 +687,45 @@ class ProfileStats(object):
            hasattr(theano.sandbox.cuda.cuda_ndarray.cuda_ndarray,
                    'theano_allocated')):
            _, gpu_max = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.theano_allocated()
-            print "    Max Memory allocated on the GPU(for all functions): %dKB" % int(round(
+            print >> file,  ("    Max Memory allocated on the GPU"
-                gpu_max / 1024.))
+                             "(for all functions): %dKB" %
+                             int(round(gpu_max / 1024.)))
-        print
+        print >> file, ""
        if len(fct_memory) > 1:
-            print "    This list is based on all functions in the profile"
+            print >> file,  (
-        print "    <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>"
+                "    This list is based on all functions in the profile")
-        print "    <created/inplace/view> is taked from the op declaration."
+        print >> file,  ("    <Sum apply outputs (bytes)>"
-        print "    Use DebugMode for warnings about inplace/view declaration being respected."
+                         " <Apply outputs shape>"
-        print
+                         " <created/inplace/view>"
-        for key, val in items[:N]:
+                         " <Apply node>")
+        print >> file,  (
+            "    <created/inplace/view> is taked from the op declaration.")
+        print >> file,  ("    Use DebugMode for warnings about inplace/view"
+                         " declaration being respected.")
+        print >> file, ""
+        items = node_mem.items()
+        items.sort(key=lambda a: a[1])
+        items.reverse()
+        for node, node_outputs_size in items[:N]:
            code = ['c'] * len(node.outputs)
-            for out, inp in getattr(key.op, 'destroy_map', {}).iteritems():
+            for out, inp in getattr(node.op, 'destroy_map', {}).iteritems():
                code[out] = "i"
-            for out, inp in getattr(key.op, 'view_map', {}).iteritems():
+            for out, inp in getattr(node.op, 'view_map', {}).iteritems():
                code[out] = "v"
-            shapes = str(fct_shapes[fgraph][key])
+            shapes = str(fct_shapes[node.fgraph][node])
-            print '     %9dB  %s %s %s' % (sum(val), shapes,
+            print >> file,  '     %9dB  %s %s %s' % (node_outputs_size,
-                                           ' '.join(code), key)
+                                                     shapes,
+                                                     ' '.join(code), node)
-        sum_remaining = sum(sum(shapes) for key, shapes in items[N:])
-        print ('   ... (remaining %i Apply account for %.2f%%(%.2fs) of'
+        sum_remaining = sum(size for _, size in items[N:])
-               ' the runtime)') % (max(0, len(nodes_mem) - N),
+        size_sum = sum(node_mem.values())
-                                   sum_remaining,
+        print >> file,  (
-                                   sum_remaining / size_sum)
+            '   ... (remaining %i Apply account for %.2f%%(%.2fs) of'
+            ' the runtime)') % (max(0, len(node_mem) - N),
+                                sum_remaining,
+                                sum_remaining / size_sum)
+        print >> file, ''
    def summary(self, file=sys.stderr, n_ops_to_print=20,
                n_applies_to_print=20):