提交 6032d56b authored 作者: Roy Xue's avatar Roy Xue

Add CPU/GPU check, and cg mark for different men info set

上级 a2dc179b
...@@ -690,11 +690,12 @@ class ProfileStats(object): ...@@ -690,11 +690,12 @@ class ProfileStats(object):
The sum of memory saved by reusing the input instead of The sum of memory saved by reusing the input instead of
new allocation new allocation
""" """
node_memory_size = 0 # Initial Mem info values [CPU, GPU]
running_memory_size = 0 node_memory_size = [0, 0]
running_max_memory_size = 0 running_memory_size = [0, 0]
node_memory_saved_by_view = 0 running_max_memory_size = [0, 0]
node_memory_saved_by_inplace = 0 node_memory_saved_by_view = [0, 0]
node_memory_saved_by_inplace = [0, 0]
# This take only the inputs/outputs dependencies. # This take only the inputs/outputs dependencies.
dependencies = fgraph.profile.dependencies dependencies = fgraph.profile.dependencies
...@@ -723,17 +724,25 @@ class ProfileStats(object): ...@@ -723,17 +724,25 @@ class ProfileStats(object):
for v in val: for v in val:
# TODO check the op returned a view # TODO check the op returned a view
if isinstance(v.type, theano.sandbox.cuda.CudaNdarrayType):
cg = 1
else:
cg = 0
if dmap and idx in dmap: if dmap and idx in dmap:
node_memory_saved_by_inplace += v node_memory_saved_by_inplace[cg] += v
# TODO check the op returned a view # TODO check the op returned a view
elif vmap and idx in vmap: elif vmap and idx in vmap:
node_memory_saved_by_view += v node_memory_saved_by_view[cg] += v
idx += 1 idx += 1
# Update the Python emulating dicts and add the memory # Update the Python emulating dicts and add the memory
# allocated by the node # allocated by the node
idx2 = 0 idx2 = 0
for out in node.outputs: for out in node.outputs:
if isinstance(v.type, theano.sandbox.cuda.CudaNdarrayType):
cg = 1
else:
cg = 0
ins = None ins = None
if dmap and idx2 in dmap: if dmap and idx2 in dmap:
vidx = dmap[idx2] vidx = dmap[idx2]
...@@ -757,17 +766,23 @@ class ProfileStats(object): ...@@ -757,17 +766,23 @@ class ProfileStats(object):
view_of[out] = origin view_of[out] = origin
viewed_by[origin].append(out) viewed_by[origin].append(out)
else: else:
running_memory_size += var_mem[out] running_memory_size[cg] += var_mem[out]
node_memory_size += var_mem[out] node_memory_size[cg] += var_mem[out]
idx2 += 1 idx2 += 1
running_max_memory_size = max(running_max_memory_size, running_max_memory_size[0] = max(running_max_memory_size[0],
running_memory_size) running_memory_size[0])
running_max_memory_size[1] = max(running_max_memory_size[1],
running_memory_size[1])
# Mimic the combination of Theano and Python gc # Mimic the combination of Theano and Python gc
for ins in node.inputs: for ins in node.inputs:
assert not (ins in view_of and viewed_by[ins]) assert not (ins in view_of and viewed_by[ins])
# we trac the original var, so this shouldn't happen # we trac the original var, so this shouldn't happen
if isinstance(v.type, theano.sandbox.cuda.CudaNdarrayType):
cg = 1
else:
cg = 2
if (dependencies[ins] and if (dependencies[ins] and
ins not in fgraph.outputs and ins not in fgraph.outputs and
ins.owner and ins.owner and
...@@ -780,7 +795,7 @@ class ProfileStats(object): ...@@ -780,7 +795,7 @@ class ProfileStats(object):
if (not viewed_by[origin] and if (not viewed_by[origin] and
origin not in fgraph.inputs and origin not in fgraph.inputs and
not isinstance(origin, theano.Constant)): not isinstance(origin, theano.Constant)):
running_memory_size -= var_mem[origin] running_memory_size[cg] -= var_mem[origin]
else: else:
# ins is viewed_by something else, so its # ins is viewed_by something else, so its
# memory isn't freed # memory isn't freed
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论