提交 061bb990 authored 作者: Frederic's avatar Frederic

The old ProfileMode memory profile is not removed.

We print a message to use the new profiler in that case. This repair ProfileMode
上级 3005d9d3
......@@ -50,7 +50,6 @@ class Profile_Maker(FunctionMaker):
#initialize the timers
for i, node in enumerate(ret.maker.fgraph.toposort()):
profile.apply_time[node] = 0.0
profile.outputs_size[node] = [0.0] * len(node.outputs)
# a thunk_group is a list of the thunks from each linker
# corresponding to the i'th position in the toposort.
......@@ -70,13 +69,13 @@ class Profile_Maker(FunctionMaker):
def new_fn():
self.mode.apply_time = self.mode.profile_stats[ret].apply_time
self.mode.outputs_size = self.mode.profile_stats[ret].outputs_size
self.mode.variable_shape = self.mode.profile_stats[ret].variable_shape
ret_fn()
# delete the old apply_time variable
# because it doesn't mean the same thing anymore.
# This prevents old code from looking like it still works.
del self.mode.apply_time
del self.mode.outputs_size
del self.mode.variable_shape
ret.fn = new_fn
......@@ -165,34 +164,10 @@ class ProfileMode(Mode):
t0 = time.time()
th()
dt = time.time() - t0
size = []
for o in th.outputs:
if not hasattr(o[0], 'size'):
#if the output type don't have a size attribute, set -1
#to signify we can't evaluate it.
#This happen at least for mtrand.RandomState type(in numpy)
size.append(-1)
continue
s = o[0].size
#can't use o[0].dtype.itemsize as dtype is a str for
#CudaNdarray
dtype = str(o[0].dtype)
dtype2 = dtype[-2:]
if dtype2 == '32':
s *= 4
elif dtype2 == '64':
s *= 8
elif dtype2 == '16':
s *= 2
elif dtype[-1] == '8':
s *= 1
elif dtype[-3:] == '128':
s *= 16
else:
raise Exception("Can't determine the memory size of dtype",
o[0].dtype)
size.append(s)
self.outputs_size[node] = size
for var, data in zip(node.outputs, th.outputs):
sh = getattr(data[0], 'shape', 'input no shape')
self.variable_shape[var] = sh
self.apply_time[node] += max(dt, 1e-14)
self.provided_linker = linker
......@@ -247,15 +222,14 @@ class ProfileMode(Mode):
print i, n
apply_cimpl = {}
outputs_size = {}
for fn, ps in self.profile_stats.items():
apply_cimpl.update(ps.apply_cimpl)
message = self.message
outputs_size = {}
variable_shape = {}
for fn, ps in self.profile_stats.items():
outputs_size.update(ps.outputs_size)
variable_shape.update(ps.variable_shape)
other_time = dict(
linker_time=sum(
......@@ -265,7 +239,7 @@ class ProfileMode(Mode):
self.print_summary_("print_summary",
compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size,
apply_time, apply_cimpl, message, variable_shape,
self.local_time, other_time,
**kwargs)
......@@ -300,19 +274,19 @@ class ProfileMode(Mode):
apply_time = diff_dict(self.apply_time, other.apply_time)
apply_cimpl = self.apply_cimpl and other.apply_cimpl
message = self.message
outputs_size = diff_dict(self.outputs_size, other.outputs_size)
variable_shape = diff_dict(self.variable_shape, other.variable_shape)
other_time = {'linker_time': self.linker_time - other.linker_time,
'optimizer_time': self.optimizer_time -
other.optimizer_time}
self.print_summary_("print_diff_summary", compile_time,
fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size,
apply_time, apply_cimpl, message, variable_shape,
print_apply=False, other_time=other_time,
**kwargs)
@staticmethod
def print_summary_(fct_name, compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size,
apply_time, apply_cimpl, message, variable_shape,
local_time, other_time,
n_apply_to_print=config.ProfileMode.n_apply_to_print,
n_ops_to_print=config.ProfileMode.n_ops_to_print,
......@@ -502,96 +476,17 @@ class ProfileMode(Mode):
print '(*) Op is running a c implementation'
for printer in profiler_printers:
printer(fct_name, compile_time, fct_call_time, fct_call,
apply_time, apply_cimpl, message, outputs_size,
apply_time, apply_cimpl, message, variable_shape,
other_time)
if not outputs_size:
if not variable_shape:
print """\nProfile of Theano intermediate memory disabled.
To enabled, put the Theano flag ProfileMode.profile_memory to True."""
else:
fct_memory={}#fgraph->dict(node->(outputs size))
var_mem = {}
for node, val in outputs_size.items():
fct_memory.setdefault(node.fgraph, {})
fct_memory[node.fgraph][node]=val
for out,v in zip(node.outputs,val):
var_mem[out]=v
print
print "Profile of Theano functions memory:"
print "(This check only the output of each apply node. It don't check the temporary memory used by the op in the apply node.)"
nb_skipped = 0
for fgraph, nodes_mem in fct_memory.iteritems():
size_sum=sum([sum(val) for key,val in nodes_mem.iteritems()])
if size_sum < min_memory_size:
nb_skipped += 1
continue
print "Theano fct:", [fct for fct in fct_call.keys() if fct.maker.fgraph is fgraph][0].name
print " Max without gc, inplace and view (KB)",size_sum/1024
node_memory_size = 0
node_memory_saved_by_view = 0
node_memory_saved_by_inplace = 0
running_memory_size = 0
running_max_memory_size = 0
post_thunk_old_storage = []
items = nodes_mem.items()
items.sort(key=lambda a: a[1])
items.reverse()
order = self.linker.schedule(fgraph)
computed, last_user = gof.link.gc_helper(order)
for node in order:
post_thunk_old_storage.append([ input_idx
for input_idx,input in enumerate(node.inputs)
if (input in computed) and (input not in fgraph.outputs) and node == last_user[input]])
for node,val in items[:n_apply_to_print]:
dmap = getattr(node.op,'destroy_map',None)
vmap = getattr(node.op,'view_map',None)
for idx,v in enumerate(val):
if dmap and idx in dmap:#TODO check the op returned a view
node_memory_saved_by_inplace += v
elif vmap and idx in vmap:#TODO check the op returned a view
node_memory_saved_by_view += v
else:
node_memory_size += v
running_memory_size += v
if running_memory_size > running_max_memory_size:
running_max_memory_size = running_memory_size
old_storage = post_thunk_old_storage[order.index(node)]
for old_s in old_storage:
running_memory_size -= var_mem[node.inputs[old_s]]
pass
pass
print " Max FAST_RUN_NO_GC (KB)", node_memory_size/1024
print " Max FAST_RUN (KB)", running_max_memory_size/1024
print " Memory saved by view (KB)", node_memory_saved_by_view/1024
print " Memory saved by inplace (KB)", node_memory_saved_by_inplace/1024
print " Memory saved by GC (KB)", (node_memory_size-running_max_memory_size)/1024
n_apply_to_print+=10#TODO remove this line
print " <Sum apply outputs (bytes)> <Apply outputs memory size(bytes)> <created/inplace/view> <Apply node>"
print " <created/inplace/view> is taked from the op declaration, not the op exeuction. Use DebugMode to have warning about inplace/view declaration being respected."
n_apply_printed = 0
for key,val in items[:n_apply_to_print]:
if sum(val) < min_memory_size:
break
code = ['c']*len(node.outputs)
for out,inp in getattr(key.op,'destroy_map',{}).iteritems():
code[out] = "i"
for out,inp in getattr(key.op,'view_map',{}).iteritems():
code[out] = "v"
print ' %9dB %s %s %s' % (sum(val), str(val), ' '.join(code), key)
n_apply_printed += 1
print ' ... (remaining %i Apply account for %.2f%%(%d bytes) of the total intermediate memory used)'\
%(max(0, len(nodes_mem)-n_apply_printed),
sum(sum(val) for key, val in items[n_apply_printed:])/float(size_sum),
sum(sum(val) for key, val in items[n_apply_printed:]))
if nb_skipped > 0:
print ' We skipped %d theano function(s). Each of them used less then %dB(theano flags ProfileMode.min_memory_size) of total intermediate memory size'%(nb_skipped, min_memory_size)
print """
The memory profile in ProfileMode is removed!
Use the new profiler. Use the Theano flags
profile=True,profile_memory=True to enable it."""
print
print """Here are tips to potentially make your code run faster
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论