提交 49d1e2f8 authored 作者: Frederic's avatar Frederic

Fixes to the memory_profile with multiple fct in the same profile.

Also fix the printing.
上级 96548df9
...@@ -580,7 +580,8 @@ class ProfileStats(object): ...@@ -580,7 +580,8 @@ class ProfileStats(object):
def summary_memory(self, file, N=None): def summary_memory(self, file, N=None):
fct_memory = {} # fgraph->dict(node->(outputs size)) fct_memory = {} # fgraph->dict(node->(outputs size))
fct_shapes = {} # fgraph->dict(node->[outputs shapes])) fct_shapes = {} # fgraph->dict(node->[outputs shapes]))
var_mem = {} # varible->size in bytes, ignore the input variable var_mem = {} # varible->size in bytes; don't include input variables
node_mem = {} # node->total outputs size
for node in self.apply_callcount.keys(): for node in self.apply_callcount.keys():
fct_memory.setdefault(node.fgraph, {}) fct_memory.setdefault(node.fgraph, {})
...@@ -597,6 +598,7 @@ class ProfileStats(object): ...@@ -597,6 +598,7 @@ class ProfileStats(object):
var_mem[out] = v var_mem[out] = v
fct_memory[node.fgraph][node].append(v) fct_memory[node.fgraph][node].append(v)
fct_shapes[node.fgraph][node].append(sh) fct_shapes[node.fgraph][node].append(sh)
node_mem[node] = sum([var_mem[var] for var in node.outputs])
#Find the function that used the most memory #Find the function that used the most memory
max_sum_size = 0 max_sum_size = 0
...@@ -657,57 +659,27 @@ class ProfileStats(object): ...@@ -657,57 +659,27 @@ class ProfileStats(object):
node_memory_saved_by_view) node_memory_saved_by_view)
max_node_memory_saved_by_inplace = max( max_node_memory_saved_by_inplace = max(
max_node_memory_saved_by_inplace, node_memory_saved_by_inplace) max_node_memory_saved_by_inplace, node_memory_saved_by_inplace)
del fgraph, nodes_mem, items, post_thunk_old_storage, node
print
if len(fct_memory) > 1: if len(fct_memory) > 1:
print "Memory Profile (the max between all function in that profile)" print >> file, ("Memory Profile "
"(the max between all function in that profile)")
else: else:
print "Memory Profile" print >> file, "Memory Profile"
print "---" print >> file, "---"
size_sum = sum(var_mem.values())
print " Max without gc, inplace and view: %dKB" % int(
round(size_sum / 1024))
order = fgraph.toposort() print >> file, " Max without gc, inplace and view: %dKB" % int(
computed, last_user = theano.gof.link.gc_helper(order) round(max_sum_size / 1024))
for node in order:
post_thunk_old_storage.append([
input_idx
for input_idx, input in enumerate(node.inputs)
if (input in computed) and
(input not in fgraph.outputs) and
node == last_user[input]])
for node, val in items:
dmap = getattr(node.op, 'destroy_map', None)
vmap = getattr(node.op, 'view_map', None)
for idx, v in enumerate(val): print >> file, " Max allow_gc=False: %dKB" % int(round(
# TODO check the op returned a view
if dmap and idx in dmap:
node_memory_saved_by_inplace += v
# TODO check the op returned a view
elif vmap and idx in vmap:
node_memory_saved_by_view += v
else:
node_memory_size += v
running_memory_size += v
if running_memory_size > running_max_memory_size:
running_max_memory_size = running_memory_size
old_storage = post_thunk_old_storage[order.index(node)]
for old_s in old_storage:
running_memory_size -= var_mem[node.inputs[old_s]]
pass
pass
print " Max allow_gc=False: %dKB" % int(round(
max_node_memory_size / 1024.)) max_node_memory_size / 1024.))
print " Max linker=c|py: %dKB" % int(round( print >> file, " Max linker=c|py: %dKB" % int(round(
max_running_max_memory_size / 1024.)) max_running_max_memory_size / 1024.))
print " Memory saved by view: %dKB" % int(round( print >> file, " Memory saved by view: %dKB" % int(round(
max_node_memory_saved_by_view / 1024.)) max_node_memory_saved_by_view / 1024.))
print " Memory saved by inplace: %dKB" % int(round( print >> file, " Memory saved by inplace: %dKB" % int(round(
max_node_memory_saved_by_inplace / 1024.)) max_node_memory_saved_by_inplace / 1024.))
print " Memory saved by GC: %dKB" % int(round(( print >> file, " Memory saved by GC: %dKB" % int(round((
max_node_memory_size - max_running_max_memory_size) / 1024.)) max_node_memory_size - max_running_max_memory_size) / 1024.))
if (hasattr(theano, 'sandbox') and if (hasattr(theano, 'sandbox') and
hasattr(theano.sandbox, 'cuda') and hasattr(theano.sandbox, 'cuda') and
...@@ -715,31 +687,45 @@ class ProfileStats(object): ...@@ -715,31 +687,45 @@ class ProfileStats(object):
hasattr(theano.sandbox.cuda.cuda_ndarray.cuda_ndarray, hasattr(theano.sandbox.cuda.cuda_ndarray.cuda_ndarray,
'theano_allocated')): 'theano_allocated')):
_, gpu_max = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.theano_allocated() _, gpu_max = theano.sandbox.cuda.cuda_ndarray.cuda_ndarray.theano_allocated()
print " Max Memory allocated on the GPU(for all functions): %dKB" % int(round( print >> file, (" Max Memory allocated on the GPU"
gpu_max / 1024.)) "(for all functions): %dKB" %
int(round(gpu_max / 1024.)))
print print >> file, ""
if len(fct_memory) > 1: if len(fct_memory) > 1:
print " This list is based on all functions in the profile" print >> file, (
print " <Sum apply outputs (bytes)> <Apply outputs shape> <created/inplace/view> <Apply node>" " This list is based on all functions in the profile")
print " <created/inplace/view> is taked from the op declaration." print >> file, (" <Sum apply outputs (bytes)>"
print " Use DebugMode for warnings about inplace/view declaration being respected." " <Apply outputs shape>"
print " <created/inplace/view>"
for key, val in items[:N]: " <Apply node>")
print >> file, (
" <created/inplace/view> is taked from the op declaration.")
print >> file, (" Use DebugMode for warnings about inplace/view"
" declaration being respected.")
print >> file, ""
items = node_mem.items()
items.sort(key=lambda a: a[1])
items.reverse()
for node, node_outputs_size in items[:N]:
code = ['c'] * len(node.outputs) code = ['c'] * len(node.outputs)
for out, inp in getattr(key.op, 'destroy_map', {}).iteritems(): for out, inp in getattr(node.op, 'destroy_map', {}).iteritems():
code[out] = "i" code[out] = "i"
for out, inp in getattr(key.op, 'view_map', {}).iteritems(): for out, inp in getattr(node.op, 'view_map', {}).iteritems():
code[out] = "v" code[out] = "v"
shapes = str(fct_shapes[fgraph][key]) shapes = str(fct_shapes[node.fgraph][node])
print ' %9dB %s %s %s' % (sum(val), shapes, print >> file, ' %9dB %s %s %s' % (node_outputs_size,
' '.join(code), key) shapes,
' '.join(code), node)
sum_remaining = sum(sum(shapes) for key, shapes in items[N:])
print (' ... (remaining %i Apply account for %.2f%%(%.2fs) of' sum_remaining = sum(size for _, size in items[N:])
' the runtime)') % (max(0, len(nodes_mem) - N), size_sum = sum(node_mem.values())
print >> file, (
' ... (remaining %i Apply account for %.2f%%(%.2fs) of'
' the runtime)') % (max(0, len(node_mem) - N),
sum_remaining, sum_remaining,
sum_remaining / size_sum) sum_remaining / size_sum)
print >> file, ''
def summary(self, file=sys.stderr, n_ops_to_print=20, def summary(self, file=sys.stderr, n_ops_to_print=20,
n_applies_to_print=20): n_applies_to_print=20):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论