提交 d1ad4a1e authored 作者: Roy Xue's avatar Roy Xue

1. Modify the node_executed_order in ../gof/vm.py

2. Create the node_cleared_order in ../gof/vm.py 3. Add loops for new orders in .. /compile/vm.py
上级 77639247
...@@ -651,6 +651,9 @@ class ProfileStats(object): ...@@ -651,6 +651,9 @@ class ProfileStats(object):
# after the execution of the corresponding node. # after the execution of the corresponding node.
# It mean that after executing the node, # It mean that after executing the node,
# the corresponding variable can be gc. # the corresponding variable can be gc.
new_order = fgraph.profile.node_executed_order
new_storage = fgraph.profile.node_cleared_order
post_thunk_old_storage = [] post_thunk_old_storage = []
computed, last_user = theano.gof.link.gc_helper(order) computed, last_user = theano.gof.link.gc_helper(order)
for node in order: for node in order:
...@@ -683,6 +686,24 @@ class ProfileStats(object): ...@@ -683,6 +686,24 @@ class ProfileStats(object):
if not isinstance(old_v, str): if not isinstance(old_v, str):
running_memory_size -= old_v running_memory_size -= old_v
for node in new_order:
val = nodes_mem[node]
dmap = getattr(node.op, 'destroy_map', None)
vmap = getattr(node.op, 'view_map', None)
for idx, v in enumerate(val):
if dmap and idx in dmap:
new_node_memory_saved_by_inplace += v
elif vmap and idx in vmap:
new_node_memory_saved_by_view += v
elif not isinstance(v, str):
new_node_memory_size += v
new_running_memory_size += v
for new_s in new_storage:
new_v = var_mem[node.inputs[new_s]]
if not isinstance(new_v, str):
new_running_memory_size -= new_v
# Store the max of some stats by any function in this profile. # Store the max of some stats by any function in this profile.
max_sum_size = max(max_sum_size, sum_size) max_sum_size = max(max_sum_size, sum_size)
max_node_memory_size = max(max_node_memory_size, node_memory_size) max_node_memory_size = max(max_node_memory_size, node_memory_size)
......
...@@ -141,6 +141,12 @@ class VM(object): ...@@ -141,6 +141,12 @@ class VM(object):
profile.variable_shape = self.variable_shape.copy() profile.variable_shape = self.variable_shape.copy()
profile.variable_strides = self.variable_strides.copy() profile.variable_strides = self.variable_strides.copy()
if hasattr(self, 'node_executed_order'):
profile.node_executed_order = self.node_executed_order.copy()
if hasattr(self, 'node_cleared_order'):
profile.node_cleared_order = self.node_cleared_order.copy()
# clear the timer info out of the buffers # clear the timer info out of the buffers
for i in xrange(len(self.call_times)): for i in xrange(len(self.call_times)):
self.call_times[i] = 0.0 self.call_times[i] = 0.0
...@@ -298,7 +304,7 @@ class Stack(VM): ...@@ -298,7 +304,7 @@ class Stack(VM):
idx = self.node_idx[node] idx = self.node_idx[node]
t0 = time.time() t0 = time.time()
rval = self.thunks[idx]() rval = self.thunks[idx]()
self.node_order.append(idx) self.node_executed_order.append(idx)
# Some thunks on some computers run faster than the granularity # Some thunks on some computers run faster than the granularity
# of the time.time clock. # of the time.time clock.
...@@ -319,7 +325,8 @@ class Stack(VM): ...@@ -319,7 +325,8 @@ class Stack(VM):
compute_map = self.compute_map compute_map = self.compute_map
thunks = self.thunks thunks = self.thunks
dependencies = self.dependencies dependencies = self.dependencies
self.node_order = [] self.node_executed_order = []
self.node_cleared_order = []
for k in self.storage_map: for k in self.storage_map:
compute_map[k][0] = (k.owner is None) compute_map[k][0] = (k.owner is None)
...@@ -502,6 +509,7 @@ class Stack(VM): ...@@ -502,6 +509,7 @@ class Stack(VM):
break break
if empty_storage_map: if empty_storage_map:
storage_map[i][0] = None storage_map[i][0] = None
self.node_cleared_order.append(storage_map[v])
#See the not lazy gc code for explanations #See the not lazy gc code for explanations
#of compute_map change #of compute_map change
compute_map[i][0] = 2 compute_map[i][0] = 2
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论