提交 96548df9 authored 作者: Frederic's avatar Frederic

Make the memory profiler work when the profile contain multiple function.

上级 db97d511
...@@ -580,7 +580,7 @@ class ProfileStats(object): ...@@ -580,7 +580,7 @@ class ProfileStats(object):
def summary_memory(self, file, N=None): def summary_memory(self, file, N=None):
fct_memory = {} # fgraph->dict(node->(outputs size)) fct_memory = {} # fgraph->dict(node->(outputs size))
fct_shapes = {} # fgraph->dict(node->[outputs shapes])) fct_shapes = {} # fgraph->dict(node->[outputs shapes]))
var_mem = {} var_mem = {} # varible->size in bytes, ignore the input variable
for node in self.apply_callcount.keys(): for node in self.apply_callcount.keys():
fct_memory.setdefault(node.fgraph, {}) fct_memory.setdefault(node.fgraph, {})
...@@ -598,15 +598,16 @@ class ProfileStats(object): ...@@ -598,15 +598,16 @@ class ProfileStats(object):
fct_memory[node.fgraph][node].append(v) fct_memory[node.fgraph][node].append(v)
fct_shapes[node.fgraph][node].append(sh) fct_shapes[node.fgraph][node].append(sh)
assert len(fct_memory) == 1 #Find the function that used the most memory
print max_sum_size = 0
print " Memory Profile" max_node_memory_size = 0
max_running_memory_size = 0
max_running_max_memory_size = 0
max_node_memory_saved_by_view = 0
max_node_memory_saved_by_inplace = 0
for fgraph, nodes_mem in fct_memory.iteritems(): for fgraph, nodes_mem in fct_memory.iteritems():
size_sum = sum([sum(val) sum_size = sum([sum(val)
for key, val in nodes_mem.iteritems()]) for key, val in nodes_mem.iteritems()])
print " Max without gc, inplace and view: %dKB" % int(
round(size_sum / 1024))
node_memory_size = 0 node_memory_size = 0
node_memory_saved_by_view = 0 node_memory_saved_by_view = 0
...@@ -618,6 +619,55 @@ class ProfileStats(object): ...@@ -618,6 +619,55 @@ class ProfileStats(object):
items.sort(key=lambda a: a[1]) items.sort(key=lambda a: a[1])
items.reverse() items.reverse()
order = fgraph.toposort()
computed, last_user = theano.gof.link.gc_helper(order)
for node in order:
post_thunk_old_storage.append([
input_idx
for input_idx, input in enumerate(node.inputs)
if (input in computed) and
(input not in fgraph.outputs) and
node == last_user[input]])
for node, val in items:
dmap = getattr(node.op, 'destroy_map', None)
vmap = getattr(node.op, 'view_map', None)
for idx, v in enumerate(val):
# TODO check the op returned a view
if dmap and idx in dmap:
node_memory_saved_by_inplace += v
# TODO check the op returned a view
elif vmap and idx in vmap:
node_memory_saved_by_view += v
else:
node_memory_size += v
running_memory_size += v
if running_memory_size > running_max_memory_size:
running_max_memory_size = running_memory_size
old_storage = post_thunk_old_storage[order.index(node)]
for old_s in old_storage:
running_memory_size -= var_mem[node.inputs[old_s]]
max_sum_size = max(max_sum_size, sum_size)
max_node_memory_size = max(max_node_memory_size, node_memory_size)
max_running_memory_size = max(max_running_memory_size,
running_memory_size)
max_running_max_memory_size = max(max_running_max_memory_size,
running_max_memory_size)
max_node_memory_saved_by_view = max(max_node_memory_saved_by_view,
node_memory_saved_by_view)
max_node_memory_saved_by_inplace = max(
max_node_memory_saved_by_inplace, node_memory_saved_by_inplace)
print
if len(fct_memory) > 1:
print "Memory Profile (the max between all function in that profile)"
else:
print "Memory Profile"
print "---"
size_sum = sum(var_mem.values())
print " Max without gc, inplace and view: %dKB" % int(
round(size_sum / 1024))
order = fgraph.toposort() order = fgraph.toposort()
computed, last_user = theano.gof.link.gc_helper(order) computed, last_user = theano.gof.link.gc_helper(order)
for node in order: for node in order:
...@@ -650,15 +700,15 @@ class ProfileStats(object): ...@@ -650,15 +700,15 @@ class ProfileStats(object):
pass pass
print " Max allow_gc=False: %dKB" % int(round( print " Max allow_gc=False: %dKB" % int(round(
node_memory_size / 1024.)) max_node_memory_size / 1024.))
print " Max linker=c|py: %dKB" % int(round( print " Max linker=c|py: %dKB" % int(round(
running_max_memory_size / 1024.)) max_running_max_memory_size / 1024.))
print " Memory saved by view: %dKB" % int(round( print " Memory saved by view: %dKB" % int(round(
node_memory_saved_by_view / 1024.)) max_node_memory_saved_by_view / 1024.))
print " Memory saved by inplace: %dKB" % int(round( print " Memory saved by inplace: %dKB" % int(round(
node_memory_saved_by_inplace / 1024.)) max_node_memory_saved_by_inplace / 1024.))
print " Memory saved by GC: %dKB" % int(round(( print " Memory saved by GC: %dKB" % int(round((
node_memory_size - running_max_memory_size) / 1024.)) max_node_memory_size - max_running_max_memory_size) / 1024.))
if (hasattr(theano, 'sandbox') and if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and hasattr(theano.sandbox, 'cuda') and
hasattr(theano.sandbox.cuda, 'cuda_ndarray') and hasattr(theano.sandbox.cuda, 'cuda_ndarray') and
...@@ -669,6 +719,8 @@ class ProfileStats(object): ...@@ -669,6 +719,8 @@ class ProfileStats(object):
gpu_max / 1024.)) gpu_max / 1024.))
print print
if len(fct_memory) > 1:
print " This list is based on all functions in the profile"
print " <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>" print " <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>"
print " <created/inplace/view> is taked from the op declaration." print " <created/inplace/view> is taked from the op declaration."
print " Use DebugMode for warnings about inplace/view declaration being respected." print " Use DebugMode for warnings about inplace/view declaration being respected."
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论