提交 d1ad4a1e authored 作者: Roy Xue's avatar Roy Xue

1. Modify the node_executed_order in ../gof/vm.py

2. Create the node_cleared_order in ../gof/vm.py 3. Add loops for new orders in .. /compile/vm.py
上级 77639247
......@@ -651,6 +651,9 @@ class ProfileStats(object):
# after the execution of the corresponding node.
# It mean that after executing the node,
# the corresponding variable can be gc.
new_order = fgraph.profile.node_executed_order
new_storage = fgraph.profile.node_cleared_order
post_thunk_old_storage = []
computed, last_user = theano.gof.link.gc_helper(order)
for node in order:
......@@ -683,6 +686,24 @@ class ProfileStats(object):
if not isinstance(old_v, str):
running_memory_size -= old_v
for node in new_order:
val = nodes_mem[node]
dmap = getattr(node.op, 'destroy_map', None)
vmap = getattr(node.op, 'view_map', None)
for idx, v in enumerate(val):
if dmap and idx in dmap:
new_node_memory_saved_by_inplace += v
elif vmap and idx in vmap:
new_node_memory_saved_by_view += v
elif not isinstance(v, str):
new_node_memory_size += v
new_running_memory_size += v
for new_s in new_storage:
new_v = var_mem[node.inputs[new_s]]
if not isinstance(new_v, str):
new_running_memory_size -= new_v
# Store the max of some stats by any function in this profile.
max_sum_size = max(max_sum_size, sum_size)
max_node_memory_size = max(max_node_memory_size, node_memory_size)
......
......@@ -141,6 +141,12 @@ class VM(object):
profile.variable_shape = self.variable_shape.copy()
profile.variable_strides = self.variable_strides.copy()
if hasattr(self, 'node_executed_order'):
profile.node_executed_order = self.node_executed_order.copy()
if hasattr(self, 'node_cleared_order'):
profile.node_cleared_order = self.node_cleared_order.copy()
# clear the timer info out of the buffers
for i in xrange(len(self.call_times)):
self.call_times[i] = 0.0
......@@ -298,7 +304,7 @@ class Stack(VM):
idx = self.node_idx[node]
t0 = time.time()
rval = self.thunks[idx]()
self.node_order.append(idx)
self.node_executed_order.append(idx)
# Some thunks on some computers run faster than the granularity
# of the time.time clock.
......@@ -319,7 +325,8 @@ class Stack(VM):
compute_map = self.compute_map
thunks = self.thunks
dependencies = self.dependencies
self.node_order = []
self.node_executed_order = []
self.node_cleared_order = []
for k in self.storage_map:
compute_map[k][0] = (k.owner is None)
......@@ -502,6 +509,7 @@ class Stack(VM):
break
if empty_storage_map:
storage_map[i][0] = None
self.node_cleared_order.append(storage_map[v])
#See the not lazy gc code for explanations
#of compute_map change
compute_map[i][0] = 2
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论