提交 96548df9 authored 作者: Frederic's avatar Frederic

Make the memory profiler work when the profile contain multiple function.

上级 db97d511
...@@ -580,7 +580,7 @@ class ProfileStats(object): ...@@ -580,7 +580,7 @@ class ProfileStats(object):
def summary_memory(self, file, N=None): def summary_memory(self, file, N=None):
fct_memory = {} # fgraph->dict(node->(outputs size)) fct_memory = {} # fgraph->dict(node->(outputs size))
fct_shapes = {} # fgraph->dict(node->[outputs shapes])) fct_shapes = {} # fgraph->dict(node->[outputs shapes]))
var_mem = {} var_mem = {} # varible->size in bytes, ignore the input variable
for node in self.apply_callcount.keys(): for node in self.apply_callcount.keys():
fct_memory.setdefault(node.fgraph, {}) fct_memory.setdefault(node.fgraph, {})
...@@ -598,15 +598,16 @@ class ProfileStats(object): ...@@ -598,15 +598,16 @@ class ProfileStats(object):
fct_memory[node.fgraph][node].append(v) fct_memory[node.fgraph][node].append(v)
fct_shapes[node.fgraph][node].append(sh) fct_shapes[node.fgraph][node].append(sh)
assert len(fct_memory) == 1 #Find the function that used the most memory
print max_sum_size = 0
print " Memory Profile" max_node_memory_size = 0
max_running_memory_size = 0
max_running_max_memory_size = 0
max_node_memory_saved_by_view = 0
max_node_memory_saved_by_inplace = 0
for fgraph, nodes_mem in fct_memory.iteritems(): for fgraph, nodes_mem in fct_memory.iteritems():
size_sum = sum([sum(val) sum_size = sum([sum(val)
for key, val in nodes_mem.iteritems()]) for key, val in nodes_mem.iteritems()])
print " Max without gc, inplace and view: %dKB" % int(
round(size_sum / 1024))
node_memory_size = 0 node_memory_size = 0
node_memory_saved_by_view = 0 node_memory_saved_by_view = 0
...@@ -646,48 +647,99 @@ class ProfileStats(object): ...@@ -646,48 +647,99 @@ class ProfileStats(object):
old_storage = post_thunk_old_storage[order.index(node)] old_storage = post_thunk_old_storage[order.index(node)]
for old_s in old_storage: for old_s in old_storage:
running_memory_size -= var_mem[node.inputs[old_s]] running_memory_size -= var_mem[node.inputs[old_s]]
pass max_sum_size = max(max_sum_size, sum_size)
pass max_node_memory_size = max(max_node_memory_size, node_memory_size)
max_running_memory_size = max(max_running_memory_size,
print " Max allow_gc=False: %dKB" % int(round( running_memory_size)
node_memory_size / 1024.)) max_running_max_memory_size = max(max_running_max_memory_size,
print " Max linker=c|py: %dKB" % int(round( running_max_memory_size)
running_max_memory_size / 1024.)) max_node_memory_saved_by_view = max(max_node_memory_saved_by_view,
print " Memory saved by view: %dKB" % int(round( node_memory_saved_by_view)
node_memory_saved_by_view / 1024.)) max_node_memory_saved_by_inplace = max(
print " Memory saved by inplace: %dKB" % int(round( max_node_memory_saved_by_inplace, node_memory_saved_by_inplace)
node_memory_saved_by_inplace / 1024.))
print " Memory saved by GC: %dKB" % int(round((
node_memory_size - running_max_memory_size) / 1024.))
if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and
hasattr(theano.sandbox.cuda, 'cuda_ndarray') and
hasattr(theano.sandbox.cuda.cuda_ndarray.cuda_ndarray,
'theano_allocated')):
_, gpu_max = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.theano_allocated()
print " Max Memory allocated on the GPU(for all functions): %dKB" % int(round(
gpu_max / 1024.))
print print
print " <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>" if len(fct_memory) > 1:
print " <created/inplace/view> is taked from the op declaration." print "Memory Profile (the max between all function in that profile)"
print " Use DebugMode for warnings about inplace/view declaration being respected." else:
print print "Memory Profile"
for key, val in items[:N]: print "---"
code = ['c'] * len(node.outputs) size_sum = sum(var_mem.values())
for out, inp in getattr(key.op, 'destroy_map', {}).iteritems(): print " Max without gc, inplace and view: %dKB" % int(
code[out] = "i" round(size_sum / 1024))
for out, inp in getattr(key.op, 'view_map', {}).iteritems():
code[out] = "v" order = fgraph.toposort()
shapes = str(fct_shapes[fgraph][key]) computed, last_user = theano.gof.link.gc_helper(order)
print ' %9dB %s %s %s' % (sum(val), shapes, for node in order:
' '.join(code), key) post_thunk_old_storage.append([
input_idx
sum_remaining = sum(sum(shapes) for key, shapes in items[N:]) for input_idx, input in enumerate(node.inputs)
print (' ... (remaining %i Apply account for %.2f%%(%.2fs) of' if (input in computed) and
' the runtime)') % (max(0, len(nodes_mem) - N), (input not in fgraph.outputs) and
sum_remaining, node == last_user[input]])
sum_remaining / size_sum) for node, val in items:
dmap = getattr(node.op, 'destroy_map', None)
vmap = getattr(node.op, 'view_map', None)
for idx, v in enumerate(val):
# TODO check the op returned a view
if dmap and idx in dmap:
node_memory_saved_by_inplace += v
# TODO check the op returned a view
elif vmap and idx in vmap:
node_memory_saved_by_view += v
else:
node_memory_size += v
running_memory_size += v
if running_memory_size > running_max_memory_size:
running_max_memory_size = running_memory_size
old_storage = post_thunk_old_storage[order.index(node)]
for old_s in old_storage:
running_memory_size -= var_mem[node.inputs[old_s]]
pass
pass
print " Max allow_gc=False: %dKB" % int(round(
max_node_memory_size / 1024.))
print " Max linker=c|py: %dKB" % int(round(
max_running_max_memory_size / 1024.))
print " Memory saved by view: %dKB" % int(round(
max_node_memory_saved_by_view / 1024.))
print " Memory saved by inplace: %dKB" % int(round(
max_node_memory_saved_by_inplace / 1024.))
print " Memory saved by GC: %dKB" % int(round((
max_node_memory_size - max_running_max_memory_size) / 1024.))
if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and
hasattr(theano.sandbox.cuda, 'cuda_ndarray') and
hasattr(theano.sandbox.cuda.cuda_ndarray.cuda_ndarray,
'theano_allocated')):
_, gpu_max = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.theano_allocated()
print " Max Memory allocated on the GPU(for all functions): %dKB" % int(round(
gpu_max / 1024.))
print
if len(fct_memory) > 1:
print " This list is based on all functions in the profile"
print " <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>"
print " <created/inplace/view> is taked from the op declaration."
print " Use DebugMode for warnings about inplace/view declaration being respected."
print
for key, val in items[:N]:
code = ['c'] * len(node.outputs)
for out, inp in getattr(key.op, 'destroy_map', {}).iteritems():
code[out] = "i"
for out, inp in getattr(key.op, 'view_map', {}).iteritems():
code[out] = "v"
shapes = str(fct_shapes[fgraph][key])
print ' %9dB %s %s %s' % (sum(val), shapes,
' '.join(code), key)
sum_remaining = sum(sum(shapes) for key, shapes in items[N:])
print (' ... (remaining %i Apply account for %.2f%%(%.2fs) of'
' the runtime)') % (max(0, len(nodes_mem) - N),
sum_remaining,
sum_remaining / size_sum)
def summary(self, file=sys.stderr, n_ops_to_print=20, def summary(self, file=sys.stderr, n_ops_to_print=20,
n_applies_to_print=20): n_applies_to_print=20):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论