提交 4abd8af7 authored 作者: Roy Xue's avatar Roy Xue

Add comments

上级 9d264092
......@@ -671,15 +671,20 @@ class ProfileStats(object):
node_memory_saved_by_inplace = 0
dependencies = fgraph.profile.dependencies
# two data structure used to mimic Python gc
view_of = {} # {var1: original var viewed by var1}
# The orignal mean that we don't keep trac of all the intermediate relationship in the view.
viewed_by = {}# {var1: [vars that view var1]}
# The len of the list is the value of python ref count. But we use a list, not just the ref count value.
# This is more safe to help detect potential bug in the algo
for var in fgraph.variable
for node in order:
idx = 0
dmap = getattr(node.op, 'destroy_map', None)
vmap = getattr(node.op, 'view_map', None)
val = nodes_mem[node]
viewed_by = {}
for i in node.inputs:
viewed_by[i] = []
view_of = {}
for v in val:
# TODO check the op returned a view
......@@ -690,19 +695,27 @@ class ProfileStats(object):
node_memory_saved_by_view += v
idx += 1
# Update the Python emulating dicts and add the memory allocated by the node
idx2 = 0
for out in node.outputs:
if (dmap or vmap):
for i in node.inputs:
view_of[out] = view_of.get(i, i)
viewed_by[i].append(out)
if (dmap and idx2 in dmap) or (vmap and idx2 in vmap):
# This is needed for destroy_map in case it return a partial view that is destroyed.
# So the output could be different then the input.
for ins in node.inputs:
assert len[ins] == 1
view_of[out] = view_of.get(ins, ins)# This get make that we keep trac of view only again the original
viewed_by[ins].append(out)
else:
running_memory_size += var_mem[out]
node_memory_size += var_mem[out]
idx2 += 1
running_max_memory_size = max(running_max_memory_size, running_memory_size)
# Mimic the combination of Theano and Python gc
for ins in node.inputs:
assert not (ins in view_of and i in viewed_by)
# we keep trac of the original var, so this shouldn't happen
if dependencies[ins] and ins not in fgraph.outputs:
if ins not in view_of and not viewed_by.get(ins, []):
running_memory_size -= var_mem[ins]
......@@ -711,9 +724,11 @@ class ProfileStats(object):
viewed_by[origin].remove(ins)
if not viewed_by[origin]:
running_memory_size -= var_mem[origin]
else:
# ins is viewed_by something else, so its memory isn't freed
pass
return [node_memory_size, running_memory_size, running_max_memory_size,
node_memory_saved_by_inplace, node_memory_saved_by_view]
return [node_memory_size, running_memory_size, running_max_memory_size, node_memory_saved_by_inplace, node_memory_saved_by_view]
def count_minimum_peak(node_list, fgraph, nodes_mem):
global maybe_executed, mem_count, mem_bound, max_mem_count
......@@ -754,10 +769,18 @@ class ProfileStats(object):
if c != "output" and check_node_state(c):
executable_nodes.add(c)
viewed_by = {}# {var1:[vars that view var1]}
# The len of the list is the value of python ref count. But we use a list, not just the ref count value.
# This is more safe to help detect potential bug in the algo
for i in node.inputs:
viewed_by[i] = []
view_of = {}# {var1: original var viewed by var1}
# The orignal mean that we don't keep trac of all the intermediate relationship in the view.
def min_memory_generator(executable_nodes):
"""
Generate all valid node order from node_list
and compute its memory peaf
and compute its memory peak
:param executable_nodes: Set of executable nodes
"""
......@@ -778,29 +801,30 @@ class ProfileStats(object):
mem_freed = 0
max_storage = max_mem_count
# {var1:[vars that view var1]}
viewed_by = {}
for i in node.inputs:
viewed_by[i] = []
# {var1: original var viewed by var1}
view_of = {}
dmap = getattr(node.op, 'destroy_map', None)
vmap = getattr(node.op, 'view_map', None)
idx = 0
# Update the Python emulating dicts and add the memory allocated by the node
for out in node.outputs:
if (dmap or vmap):
for i in node.inputs:
view_of[out] = view_of.get(i, i)
viewed_by[i].append(out)
if (dmap and idx in dmap) or (vmap and idx in vmap):
# This is needed for destroy_map in case it return a partial view that is destroyed.
# So the output could be different then the input.
for ins in node.inputs:
view_of[out] = view_of.get(ins, ins)# This get make that we keep trac of view only again the original
viewed_by[ins].append(out)
else:
mem_created += var_mem[out]
idx += 1
mem_count += mem_created
max_mem_count = max(max_mem_count, mem_count)
# Mimic the combination of Theano and Python gc.
for ins in node.inputs:
assert not (ins in view_of and i in viewed_by)
if dependencies[ins] and ins not in fgraph.outputs:
assert not (ins in view_of and ins in viewed_by)
# we keep track of the original var, so this shouldn't happen
if dependencies[ins] and ins not in fgraph.outputs and ins.owner:
if all(compute_map[v] for v in dependencies[ins]):
if ins not in view_of and not viewed_by.get(ins, []):
mem_freed += var_mem[ins]
......@@ -810,6 +834,7 @@ class ProfileStats(object):
if not viewed_by[origin]:
mem_freed += var_mem[origin]
else:
# ins is viewed_by something else, so its memory isn't freed
pass
mem_count -= mem_freed
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论